# Population density

## Download data
Source: https://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/population-distribution-demography/geostat


In [3]:
from urllib import request
from os import path

data_dir = 'data'
url = 'http://ec.europa.eu/eurostat/cache/GISCO/geodatafiles/GEOSTAT-grid-POP-1K-2011-V2-0-1.zip'
dst = path.join(data_dir, 'compressed', 'popdensity.zip')
res = request.urlopen(url)
with open(dst, 'wb') as f:
    f.write(res.read())

## Transform raw data

In [4]:
from os import path
import pandas as pd
import numpy as np
import re

data_dir = 'data'
src_name = path.join(data_dir, 'popdensity_raw.csv')
dst_name = path.join(data_dir, 'popdensity.csv')
if not path.exists(dst_name):
    df = pd.read_csv(src_name)
    df.drop(['CNTR_CODE','METHD_CL','YEAR','DATA_SRC','TOT_P_CON_DT'], axis='columns', inplace=True)
    pop, lat, lon = (np.empty(len(df.index), np.int16) for _ in range(3))
    for i, row in df.iterrows():
        s = row['GRD_ID']
        pop[i] = np.int16(row['TOT_P'])
        lat[i] = np.int16(s[-4:])
        lon[i] = np.int16(s[-9:-5])
    res = pd.DataFrame({
        'pop': pop,
        'lat': lat,
        'lon': lon
    })
    res.to_csv(dst_name, index=False)
else:
    print('Transformed data already exists!')