## Create a Choropleth map for population in germany

### Scrape Data from wikipedia using Scrapy For the Population in germany by cities

In [1]:
import scrapy
import csv
from scrapy.crawler import CrawlerProcess
from scrapy import Request
from scrapy.crawler import CrawlerRunner


class PopulationSpider(scrapy.Spider):
    name = 'population'
    allowed_domains = ['en.wikipedia.org']
    start_urls = ['https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population']

    def parse(self, response):
        density = []
        state = response.xpath('//table/tbody/tr/td[3]/a/@title').extract()
        population = response.xpath('//table/tbody/tr/td[4]/text()').extract()
        for k in range(2,80):
            density.append(response.xpath(('//table/tbody/tr[{k}]/td[8]/text()').format(k = k)).extract_first().split('/')[0])
            
            
        for i in range(len(response.xpath('//*[@id="mw-content-text"]/div/table[1]/tbody/tr/td[2]/a/@title').extract())):
            yield {'State':state[i],'Population':population[i].split(),'density':density[i]}


In [2]:
population_spider = CrawlerProcess({'FEED_URI': 'Population_germany.csv','FEED_FORMAT': 'csv'})
population_spider.crawl(PopulationSpider)
population_spider.start()

2020-06-15 17:27:00 [scrapy.utils.log] INFO: Scrapy 1.4.0 started (bot: scrapybot)
2020-06-15 17:27:00 [scrapy.utils.log] INFO: Overridden settings: {'FEED_FORMAT': 'csv', 'FEED_URI': 'Population_germany.csv'}
2020-06-15 17:27:00 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-06-15 17:27:01 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompre

2020-06-15 17:27:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population>
{'State': 'North Rhine-Westphalia', 'Population': ['245,885'], 'density': '1,529'}
2020-06-15 17:27:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population>
{'State': 'Saxony-Anhalt', 'Population': ['236,991'], 'density': '1,755'}
2020-06-15 17:27:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population>
{'State': 'Saxony-Anhalt', 'Population': ['235,723'], 'density': '1,173'}
2020-06-15 17:27:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population>
{'State': 'Baden-Württemberg', 'Population': ['226,393'], 'density': '1,479'}
2020-06-15 17:27:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://en.wikipedia.org/wiki/List_of_cities_in_Germany_by_population>
{

In [3]:
import pandas as pd

In [4]:
#read the data using pandas
df4 = pd.read_csv('Population_germany.csv')

In [5]:
df4.head()

Unnamed: 0,State,Population,density
0,Berlin,3520031,3948
1,Hamburg,1787408,2366
2,Bavaria,1450381,4668
3,North Rhine-Westphalia,1060582,2619
4,Hesse,732688,2951


In [6]:
#Check any null values
df4.isna().sum()

State         0
Population    0
density       0
dtype: int64

In [7]:
df4.dtypes

State         object
Population    object
density       object
dtype: object

In [8]:
#Change the type object to integer
df4['Population'] = df4['Population'].str.replace(',', '').astype(int)
df4['density'] = df4['density'].str.replace(',', '').astype(int)

In [9]:
import json
#geo_json can be downloaded from "https://github.com/isellsoap/deutschlandGeoJSON/blob/master/2_bundeslaender/1_sehr_hoch.geo.json"
# load geo_json

with open('germany.geojson') as f:
    german_states = json.load(f)
for i in german_states['features']:
    i['id'] = i['properties']['name']
    

In [10]:
import folium
    
# load the map in folium     
map_choropleth = folium.Map(location=[51.16,10.45], zoom_start=7)

# choropleth map
folium.Choropleth(
    geo_data=german_states,
    name='choropleth',
    data=df4,
    columns=['State','Population'],
    # see folium.Choropleth? for details on key_on
    key_on='feature.id',
    fill_color='PuBuGn',
    fill_opacity=0.5,
    line_opacity=0.5,
    legend_name='Population',
    highlight=True
).add_to(map_choropleth)

# layer control to turn choropleth on or off
folium.LayerControl().add_to(map_choropleth)

map_choropleth.save('map_population_choropleth.html')


# display map
map_choropleth