### Make a choropleth map using plotly and geojson regions


In [40]:
import json
import numpy as np
import pandas as pd
from fuzzywuzzy import fuzz, process
from matplotlib.colors import Normalize
from matplotlib import cm
from itertools import product
import copy
import requests
from flask import Flask, render_template, redirect, url_for, request
from urllib.request import urlopen
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import chart_studio.plotly as ply#plotly.plotly as ply

MAPBOX_APIKEY = "Your_Mapbox_API_key"

Load the data and save them to csv

In [2]:
# df = pd.read_csv('DCCV_TAXDISOCCU1_22112018133514795.csv')
# mask = (df['TIME'] == '2017') & (df['Sesso'] == 'totale') & (df['Classe di età'] == '15 anni e più')
# df = df.loc[mask, ['Territorio', 'Value']]
# df.set_index('Territorio', inplace=True)
# df.to_csv('italy_unemployment_2017.csv', index=True, header=True)

Reload the data

In [3]:
#### from tutorial
un = pd.read_csv('italy_unemployment_2017.csv', index_col=0)
un = un['Value']/100.
un.index.name = 'province'
un.head()

province
Torino                  0.093748
Vercelli                0.096164
Biella                  0.071894
Verbano-Cusio-Ossola    0.068392
Novara                  0.111541
Name: Value, dtype: float64

In [66]:
#Chargement des fichiers dans les dataframes
r = requests.get("https://presidentielle2022.conseil-constitutionnel.fr/telechargement/parrainagestotal.json")
decoded_data = r.text.encode().decode('utf-8-sig')
d = json.loads(decoded_data)
#chargement des fichiers dans le dataframe
pt_origin = pd.DataFrame(d)
pt_origin.query('Candidat=="ARTHAUD Nathalie"')
pt_origin.head()
un = pt_origin[["Departement","Candidat"]].query('Candidat=="ARTHAUD Nathalie"')
un = un.groupby(['Departement'])['Candidat'].count()
print(un)
un.set_index('Departement')
un.index.name = 'Departement'


Departement
Ain                         6
Aisne                      22
Allier                      1
Alpes-de-Haute-Provence     2
Ardennes                   17
                           ..
Territoire de Belfort       1
Val-d'Oise                  1
Vaucluse                    1
Vienne                      1
Vosges                     18
Name: Candidat, Length: 75, dtype: int64


AttributeError: 'Series' object has no attribute 'set_index'

Load the geojson

In [31]:
#tutorial
with open('province.geojson',encoding = "utf-8") as f:
     geojson = json.load(f)

FileNotFoundError: [Errno 2] No such file or directory: 'province.geojson'

In [44]:
#Test geomap
with urlopen('https://france-geojson.gregoiredavid.fr/repo/departements.geojson') as response:
    geojson = json.load(response)


Count the number of provinces and get the province name

In [45]:
n_departements = len(geojson['features'])
noms_departements = [geojson['features'][k]['properties']['nom'] for k in range(n_departements)]
print("il y a {} départements ".format(n_departements))

il y a 96 départements 


Center of each province

In [46]:
def get_centers():
    lon, lat =[], []

    for k in range(n_departements):
        geometry = geojson['features'][k]['geometry']

        if geometry['type'] == 'Polygon':
            coords=np.array(geometry['coordinates'][0])
        elif geometry['type'] == 'MultiPolygon':
            coords=np.array(geometry['coordinates'][0][0])

        lon.append(sum(coords[:,0]) / len(coords[:,0]))
        lat.append(sum(coords[:,1]) / len(coords[:,1]))
            
    return lon, lat

Match province names with dataframe metric

In [47]:
def match_regions(list1, list2):
    """
    Match the region names of self.metric with the region names of the geojson
    using fuzzy matching.

    Return a dict with {'old_name': 'new_name'}
    """
    matched = [process.extract(list1[i], list2, limit=1, scorer=fuzz.partial_ratio)[0][0] for i in range(0,len(list1))]

    return {key: value for (key, value) in zip(list1, matched)}

match_dict = match_regions(un.index, noms_departements)

TypeError: expected string or bytes-like object

reindex and rename the dataframe

In [8]:
df_tmp = un.copy()
df_tmp.index = df_tmp.index.map(match_dict) # rename
df_tmp = df_tmp[~df_tmp.index.duplicated(keep=False)] # drop duplicates
                                                      # from fuzzy match gone wrong
#give the same index order as the geojson
df_reindexed = df_tmp.reindex(index = province_names)
df_reindexed.head()

province
Vercelli         0.096164
Novara           0.111541
Torino           0.093748
Cuneo            0.061058
Valle d'Aosta    0.077990
Name: Value, dtype: float64

Make the sources:

In [9]:
def make_sources(downsample = 10):
    sources = []
    geojson_copy = copy.deepcopy(geojson['features'])
    
    for feature in geojson_copy:
        
        if downsample > 0:
            coords = np.array(feature['geometry']['coordinates'][0][0])
            coords = coords[::downsample]
            feature['geometry']['coordinates'] = [[coords]]
            
        sources.append(dict(type = 'FeatureCollection', 
                            features = [feature])
                      )
    return sources

Define the colours, the colorscale and the hover text

In [10]:
def scalarmappable(cmap, cmin, cmax):
        colormap = cm.get_cmap(cmap)
        norm = Normalize(vmin=cmin, vmax=cmax)
        return cm.ScalarMappable(norm=norm, cmap=colormap)
    
def get_scatter_colors(sm, df):
    grey = 'rgba(128,128,128,1)'
    return ['rgba' + str(sm.to_rgba(m, bytes = True, alpha = 1)) if not np.isnan(m) else grey for m in df]

def get_colorscale(sm, df, cmin, cmax):
    xrange = np.linspace(0, 1, len(df))
    values = np.linspace(cmin, cmax, len(df))

    return [[i, 'rgba' + str(sm.to_rgba(v, bytes = True))] for i,v in zip(xrange, values) ]
 
def get_hover_text(df) :
    text_value = (df*100).round(2).astype(str) + "%"
    with_data = '<b>{}</b> <br> {} unemployment rate'
    no_data = '<b>{}</b> <br> no data'
    
    return [with_data.format(p,v) if v != 'nan%' else no_data.format(p) for p,v in zip(df.index, text_value)]

### Ready to make map

Run all the functions

In [11]:
colormap = 'Blues'
cmin = df_reindexed.min()
cmax = df_reindexed.max()

sources = make_sources(downsample=10)
lons, lats = get_centers()

sm = scalarmappable(colormap, cmin, cmax)
scatter_colors = get_scatter_colors(sm, df_reindexed)
colorscale = get_colorscale(sm, df_reindexed, cmin, cmax)
hover_text = get_hover_text(df_reindexed)

tickformat = ".0%"

The scattermapbox data

In [12]:
data = dict(type='scattermapbox',
             lat=lats,
             lon=lons,
             mode='markers',
             text=hover_text,
             marker=dict(size=1,
                         color=scatter_colors,
                         showscale = True,
                         cmin = df_reindexed.min(),
                         cmax = df_reindexed.max(),
                         colorscale = colorscale,
                         colorbar = dict(tickformat = tickformat )
                         ),
             showlegend=False,
             hoverinfo='text'
             )

The borders

In [13]:
layers=([dict(sourcetype = 'geojson',
        source =sources[k],
        below="",
        type = 'line',
        line = dict(width = 1),
        color = 'black',
        ) for k in range(n_provinces)] +

        [dict(sourcetype = 'geojson',
             source =sources[k],
             below="water",
             type = 'fill',
             color = scatter_colors[k],
             opacity=0.8
            ) for k in range(n_provinces)]
        )

The layout

In [14]:
data_url = "http://dati.istat.it/Index.aspx?DataSetCode=DCCV_TAXDISOCCU1#"

layout = dict(title="2017 Unemployment Rate per Italian provinces <br> " + 
                    """using <a href={}>open data</a> by the """.format(data_url) +
                      "Italian National Institute of Statistics",
      autosize=False,
      width=700,
      height=800,
      hovermode='closest',
      hoverdistance = 30,

      mapbox=dict(accesstoken=MAPBOX_APIKEY,
                  layers=layers,
                  bearing=0,
                  center=dict(
                  lat=41.871941,
                  lon=12.567380),
                  pitch=0,
                  zoom=4.9,
                  style = 'light'
            )
      )

In [15]:
fig = dict(data=[data], layout=layout)
iplot(fig)

which gives the plot below ( that I am rendering from an iframe from my plotly account) 

In [16]:
from IPython.display import HTML
HTML('<iframe width="900" height="800" frameborder="0" scrolling="no" src="//plot.ly/~vincenzo.pota/24.embed"></iframe>')


Consider using IPython.display.IFrame instead

