In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import folium

## FUNCTIONS

In [2]:
def json_to_geojson(data, districts):
    # create a geojson from a list of dictionaries
    # containing coordinates with the name of the polygon
    # in our case a polygon is a district
    assert type(data) == list, "The parameter data should be a list of coordinates with a name argument!"
    
    geojson = {
        "type": "FeatureCollection",
        "features": [
        {
            "type": "Feature",
            "geometry" : {
                "type": "Polygon",
                "name": district,
                "coordinates": [[[d["lon"], d["lat"]] for d in data if d['name'] == district]],
                },
            "properties" : {'name': district},
            
         } for district in districts]
    }
    
    return geojson

def parse_inc_tax_data(income_tax):
    income_tax.columns = ['district', 'num', 'inc_tax']
    income_tax = income_tax.dropna()
    income_tax = income_tax[income_tax['district'].str.contains('kerület')]
    income_tax['district'] = [dist.replace("kerület", "kerulet").strip() for dist in income_tax['district'].tolist()]

    for col in ['inc_tax', 'num']:
        income_tax[col] = [float(f.replace(" ", "")) for f in income_tax[col].tolist()]

    income_tax['inc_tax_per_capita'] = income_tax['inc_tax'] * 1000 / income_tax['num']
    income_tax = income_tax.drop(['inc_tax', 'num'], axis = 1)
    return income_tax

## PREPARE POLYGON DATA

In [3]:
# load data
with open('data/district_coords.txt', 'r') as fp:
    district_coords = json.loads(fp.read())

In [4]:
# get a list of districts
districts = list(set([district_coord['name'] for district_coord in district_coords]))

In [5]:
# create a geojson
geojson = json_to_geojson(district_coords, districts)

## ADD DATA USING GEOPANDAS

In [6]:
# create geodataframe and add centroids
gdf = gpd.GeoDataFrame.from_features(geojson)
gdf['centroid_lon'] = gdf['geometry'].centroid.x
gdf['centroid_lat'] = gdf['geometry'].centroid.y
gdf.crs = {'init' :'epsg:4326'}
gdf.head()

Unnamed: 0,geometry,name,centroid_lon,centroid_lat
0,"POLYGON ((19.0172675 47.532714, 19.0173049 47....",III. kerulet,19.034511,47.569678
1,"POLYGON ((19.0345509 47.438895, 19.0352686 47....",XI. kerulet,19.019072,47.460257
2,"POLYGON ((19.1574138 47.4383984, 19.1581105 47...",XVIII. kerulet,19.201757,47.430279
3,"POLYGON ((19.0633281 47.5176955, 19.0635358 47...",VI. kerulet,19.065279,47.509549
4,"POLYGON ((19.2207833 47.4925469, 19.2216553 47...",X. kerulet,19.158684,47.483381


In [7]:
# get income tax per capita by districts
income_tax = pd.read_csv('data/income_tax_data.csv', sep=';')
income_tax = parse_inc_tax_data(income_tax)
income_tax.head()

Unnamed: 0,district,inc_tax_per_capita
1,I. kerulet,617381.865035
2,II. kerulet,788849.72923
3,III. kerulet,484713.119515
4,IV. kerulet,382308.294976
5,V. kerulet,655632.293546


In [8]:
# merge with inc_tax data
gdf_income_tax = gdf.merge(income_tax, left_on='name', right_on='district')
gdf_income_tax.head()

Unnamed: 0,geometry,name,centroid_lon,centroid_lat,district,inc_tax_per_capita
0,"POLYGON ((19.0172675 47.532714, 19.0173049 47....",III. kerulet,19.034511,47.569678,III. kerulet,484713.119515
1,"POLYGON ((19.0345509 47.438895, 19.0352686 47....",XI. kerulet,19.019072,47.460257,XI. kerulet,543127.162188
2,"POLYGON ((19.1574138 47.4383984, 19.1581105 47...",XVIII. kerulet,19.201757,47.430279,XVIII. kerulet,374651.283307
3,"POLYGON ((19.0633281 47.5176955, 19.0635358 47...",VI. kerulet,19.065279,47.509549,VI. kerulet,494609.969605
4,"POLYGON ((19.2207833 47.4925469, 19.2216553 47...",X. kerulet,19.158684,47.483381,X. kerulet,333391.725328


## CREATE A MAP IN FOLIUM

In [9]:
# create a map
m = folium.Map(location=[np.median(gdf_income_tax['centroid_lat'].tolist()), np.median(gdf_income_tax['centroid_lon'].tolist())], tiles='Stamen Toner', zoom_start=12)

## add chloropleth layer
m.choropleth(
    geo_data=geojson,
    name='Income Tax Per Capita',
    data=gdf_income_tax,
    columns=['name', 'inc_tax_per_capita'],
    fill_color='YlGn',
    key_on='feature.geometry.name',
    legend_name='inc_tax_per_capita'
)  

# add markers with basic information
fg = folium.FeatureGroup(name='District Info')
for lat, lon, val, name in zip(gdf_income_tax['centroid_lat'].tolist(), gdf_income_tax['centroid_lon'].tolist(), gdf_income_tax['inc_tax_per_capita'].tolist(), gdf_income_tax['name'].tolist()):
    html = f"""
    <h2>{name}<\h2><br>
    <h4>Income Tax Per Capita: {int(round(val,0))} HUF <\h4>
    """
    fg.add_child(folium.Marker(location=[lat, lon], popup=html))

m.add_child(fg)

# enable layers to be turned in or out
folium.LayerControl().add_to(m)

# save it
m.save('outputs/income_tax_percapita_by_districts.html')

In [10]:
m