In [2]:
import pandas as pd
import io
import zipfile
import urllib.request
import seaborn as sns
import numpy as np
import folium
from geopy.geocoders import Nominatim

# Download data

In [3]:
data_2021 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/Donnees_consolidees_2021.zip"
data_2020 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2020.zip"
data_2019 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2019.zip"
data_2018 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2018.zip"
data_2017 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2017.zip"
data_2016 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2016.zip"
data_2015 = "https://www.pole-emploi.org/files/live/sites/peorg/files/documents/Statistiques-et-analyses/Open-data/BMO/donnees_consolidees_2015.zip"

data_2021 = urllib.request.urlopen(data_2021)
zipfile = zipfile.ZipFile(io.BytesIO(data_2021.read()))
extracted_file = zipfile.open(zipfile.namelist()[0])

In [4]:
data = pd.read_excel(extracted_file, sheet_name=1)
data = data[data.met != "*"].dropna()
data.met = data.met.astype(int)

In [5]:
data.drop(columns=["Code métier BMO", "Famille_met", "BE21", "Dept", "REG", "xmet", "smet"])

Unnamed: 0,annee,Nom métier BMO,Lbl_fam_met,NOMBE21,NomDept,NOM_REG,met
0,2021,Agriculteurs salariés,Autres métiers,BASSIN BASSE-TERRE,Guadeloupe,Guadeloupe,252
1,2021,Agriculteurs salariés,Autres métiers,BASSIN GRANDE-TERRE,Guadeloupe,Guadeloupe,198
2,2021,Agriculteurs salariés,Autres métiers,BASSIN CENTRE,Guadeloupe,Guadeloupe,95
3,2021,Agriculteurs salariés,Autres métiers,MARTINIQUE CENTRE,Martinique,Martinique,143
4,2021,Agriculteurs salariés,Autres métiers,MARTINIQUE NORD ATLANTIQUE,Martinique,Martinique,109
...,...,...,...,...,...,...,...
46587,2021,Formateurs,Fonctions sociales et médico-sociales,LA CASA,Alpes-Maritimes,Provence-Alpes-Côte d'Azur,29
46588,2021,Formateurs,Fonctions sociales et médico-sociales,PAYS D ARLES,Bouches-du-Rhône,Provence-Alpes-Côte d'Azur,85
46589,2021,Formateurs,Fonctions sociales et médico-sociales,EST VAR,Var,Provence-Alpes-Côte d'Azur,103
46590,2021,Formateurs,Fonctions sociales et médico-sociales,HAUT VAR,Var,Provence-Alpes-Côte d'Azur,37


In [25]:
data = data.groupby(["NomDept"]).agg({'met': np.sum})

In [26]:
france = folium.Map(location=[46.8,2], zoom_start=6)

In [27]:
data

Unnamed: 0_level_0,met
NomDept,Unnamed: 1_level_1
Ain,15529
Aisne,18567
Allier,9011
Alpes-Maritimes,50364
Alpes-de-Haute-Provence,9287
...,...
Vendée,40805
Vienne,16951
Vosges,9549
Yonne,9879


In [30]:
geolocator = Nominatim(user_agent="example")
locs = []

for dept in data.index.values:
    try:
        loc = geolocator.geocode(dept).raw
    except:
        print(f"Not working for {dept}")
        loc.append((None, None))
        continue
    print(f"Loc for {dept}: ({loc['lat'], loc['lon']})")
    locs.append((loc['lat'], loc['lon']))

Loc for Ain: (('49.453285449999996', '3.606899003594057'))
Loc for Aisne: (('49.453285449999996', '3.606899003594057'))
Loc for Allier: (('46.36746405', '3.163882848311948'))
Loc for Alpes-Maritimes: (('43.9210587', '7.1790785'))
Loc for Alpes-de-Haute-Provence: (('44.1640832', '6.187851538609079'))
Loc for Ardennes: (('49.69801175', '4.671600518245179'))
Loc for Ardèche: (('44.815194000000005', '4.3986524702343965'))
Loc for Ariège: (('42.9455368', '1.4065544156065486'))
Loc for Aube: (('48.3201921', '4.1905396615047525'))
Loc for Aude: (('43.0542733', '2.512471457499548'))
Loc for Aveyron: (('44.315857449999996', '2.5065697302419823'))
Loc for Bas-Rhin: (('48.5991783', '7.533672856882669'))
Loc for Bouches-du-Rhône: (('43.5424182', '5.034323560504859'))
Loc for Calvados: (('49.09076485', '-0.24139505722798021'))
Loc for Cantal: (('45.0497701', '2.699717567737356'))
Loc for Charente: (('45.6667902', '0.09730504409848517'))
Loc for Charente-Maritime: (('45.73022675', '-0.72128758725637

In [32]:
data["locs"] = locs

In [33]:
data

Unnamed: 0_level_0,met,locs
NomDept,Unnamed: 1_level_1,Unnamed: 2_level_1
Ain,15529,"(49.453285449999996, 3.606899003594057)"
Aisne,18567,"(49.453285449999996, 3.606899003594057)"
Allier,9011,"(46.36746405, 3.163882848311948)"
Alpes-Maritimes,50364,"(43.9210587, 7.1790785)"
Alpes-de-Haute-Provence,9287,"(44.1640832, 6.187851538609079)"
...,...,...
Vendée,40805,"(46.67577325, -1.29144634801388)"
Vienne,16951,"(48.2083537, 16.3725042)"
Vosges,9549,"(48.16378605, 6.382071173595532)"
Yonne,9879,"(47.85512575, 3.6450439257238765)"


In [35]:
for (lat,lon),val in zip(data.locs, data.met):
    folium.Marker((lat,lon), popup="%i" % val).add_to(france)

In [36]:
france