In [26]:
import pandas as pd
import numpy as np
import json
import os
import requests, zipfile, io
import shutil

## Download the data and extract it in the data folder

In [27]:
def parent(current_dir):
    return os.path.abspath(os.path.join(current_dir, os.pardir))

In [28]:
link = "http://data.assemblee-nationale.fr/static/openData/repository/15/amo/deputes_actifs_mandats_actifs_organes/AMO10_deputes_actifs_mandats_actifs_organes_XV.json.zip"

In [29]:
r = requests.get(link)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(os.path.join(parent(os.getcwd()), 'data'))

## Extract all the deputies infos from the csv files

The next cell scans through all the files with deputies info (they start with 'PA') and extracts the relevant data (name, family name, date of birth, political party...)
At the end we output a list of political parties with their respective code.

### Create the dataframe with the deputies information and the political parties information

In [30]:
column_names = ["code", "sex", "family name", "first name", "date of birth", "activity", "pol party", "dep", "num_dep", "circo"]
df_dep = pd.DataFrame(columns = column_names)
row_index = 0
non_declared = 'PO746314'

for path, subdirs, files in os.walk(os.path.join(parent(os.getcwd()), 'data', 'json')):
    for name in files:
        if name[0:2] == 'PA':
            with open(os.path.join(path, name)) as f:
                data = json.load(f)
                print(data['acteur']['uid']['#text'])
                l = [data['acteur']['uid']['#text']]
                if data['acteur']['etatCivil']['ident']['civ'] == 'M.':
                    l.append('male')
                else:
                    l.append('female')
                l.append(data['acteur']['etatCivil']['ident']['nom'])
                l.append(data['acteur']['etatCivil']['ident']['prenom'])
                l.append(data['acteur']['etatCivil']['infoNaissance']['dateNais'])
                l.append(data['acteur']['profession']['socProcINSEE']['famSocPro'])
                for i in range(len(data['acteur']['mandats']['mandat'])):
                    if data['acteur']['mandats']['mandat'][i]['typeOrgane'] == 'PARPOL':
                        l.append(data['acteur']['mandats']['mandat'][i]['organes']['organeRef'])
                if(len(l) == 6):
                    l.append(non_declared)
                for i in range(len(data['acteur']['mandats']['mandat'])):
                    if((data['acteur']['mandats']['mandat'][i]['typeOrgane'] == 'ASSEMBLEE')&(data['acteur']['mandats']['mandat'][i]['infosQualite']['codeQualite'] == 'membre')):
                        l.append(data['acteur']['mandats']['mandat'][i]['election']['lieu']['departement'])
                        l.append(data['acteur']['mandats']['mandat'][i]['election']['lieu']['numDepartement'])
                        l.append(data['acteur']['mandats']['mandat'][i]['election']['lieu']['numCirco'])
                df_dep.loc[row_index] = l
                row_index = row_index + 1  
df_dep.loc[df_dep['activity'].str.len() < 5, 'activity'] = 'inconnu'
df_dep.info()

PA1008
PA1012
PA1029
PA1198
PA1206
PA1276
PA1327
PA1592
PA1630
PA1695
PA1809
PA1838
PA1874
PA2150
PA2155
PA223837
PA2377
PA2449
PA2492
PA2529
PA266776
PA266788
PA266793
PA266797
PA266808
PA267042
PA267200
PA267204
PA267260
PA267289
PA267306
PA267318
PA267324
PA267330
PA267337
PA267355
PA267378
PA267429
PA267440
PA267450
PA267735
PA267766
PA267780
PA267785
PA267794
PA267901
PA268019
PA2796
PA2828
PA2952
PA2960
PA330008
PA330240
PA330788
PA330909
PA331582
PA331835
PA332228
PA332523
PA332614
PA332747
PA333285
PA333421
PA333818
PA334116
PA334768
PA335054
PA335612
PA335999
PA336175
PA336316
PA336439
PA340343
PA340357
PA342196
PA342240
PA342415
PA342601
PA342935
PA343493
PA345722
PA346054
PA346876
PA405480
PA421348
PA508
PA588884
PA604
PA605036
PA605069
PA605084
PA605131
PA605518
PA605694
PA605963
PA605991
PA606098
PA606171
PA606202
PA606507
PA606639
PA607090
PA607155
PA607193
PA607395
PA607553
PA607595
PA607619
PA608016
PA608172
PA608292
PA608416
PA608641
PA608695
PA608741
PA608826
PA609332

In [31]:
df_dep.sample(20)

Unnamed: 0,code,sex,family name,first name,date of birth,activity,pol party,dep,num_dep,circo
468,PA721678,male,Reda,Robin,1991-05-10,Sans profession déclarée,PO710396,Essonne,91,7
183,PA719024,male,Blanchet,Christophe,1973-04-09,"Artisans, commerçants et chefs d'entreprise",PO684938,Calvados,14,4
289,PA720006,male,Geismar,Luc,1966-11-01,Cadres et professions intellectuelles supérieures,PO684938,Loire-Atlantique,44,5
514,PA722090,female,Pételle,Bénédicte,1971-06-04,Professions Intermédiaires,PO761294,Hauts-de-Seine,92,2
178,PA718978,male,Laqhila,Mohamed,1959-08-03,Cadres et professions intellectuelles supérieures,PO684938,Bouches-du-Rhône,13,11
288,PA719994,female,Kamowski,Catherine,1958-04-08,Cadres et professions intellectuelles supérieures,PO761294,Isère,38,5
480,PA721784,female,Gomez-Bassac,Valérie,1969-07-16,Cadres et professions intellectuelles supérieures,PO761294,Var,83,6
461,PA721616,male,Rupin,Pacôme,1985-01-25,Cadres et professions intellectuelles supérieures,PO761294,Paris,75,7
398,PA720996,male,El Guerrab,M'jid,1983-04-25,Cadres et professions intellectuelles supérieures,PO744864,Français établis hors de France,99,9
347,PA720546,male,Bruneel,Alain,1952-03-07,Retraités,PO684926,Nord,59,16


In [32]:
column_names = ["code", "name", "abreviated_name"]
df_polpar = pd.DataFrame(columns = column_names)
row_index = 0

for path, subdirs, files in os.walk(os.path.join(parent(os.getcwd()), 'data', 'json')):
    for name in files:
        if name[0:2] == 'PO':
            with open(os.path.join(path, name)) as f:
                data = json.load(f)
                if data['organe']['codeType'] == 'PARPOL':
                    l = [data['organe']['uid']]
                    l.append(data['organe']['libelle'])
                    l.append(data['organe']['libelleAbrev'])
                    df_polpar.loc[row_index] = l
                    row_index = row_index + 1 
df_polpar.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19 entries, 0 to 18
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   code             19 non-null     object
 1   name             19 non-null     object
 2   abreviated_name  19 non-null     object
dtypes: object(3)
memory usage: 608.0+ bytes


## Count the number of members per political parties

In [33]:
#count the members per parties
df_members = df_dep['pol party'].value_counts().to_frame()
#extract the parties with less than 7 members
df_parties_to_replace = df_members.loc[df_members['pol party'] < 7]
#aggregate these parties into the not declared party (code = PO746314)
for party in df_parties_to_replace.index.to_list():
    df_dep.loc[df_dep['pol party'] == party, 'pol party'] = non_declared
    
#recount the number of members with the modification
df_members = df_dep['pol party'].value_counts().to_frame()
#remove the parties with few members
df_polpar = df_polpar.loc[df_polpar['code'].isin(df_members.loc[df_members['pol party'] >= 7].index.to_list())]
#add number of members in the political party dataframe
i = 0
for party in df_dep['pol party'].value_counts().to_frame().index.to_list():
    df_polpar.loc[df_polpar['code'] == party, 'members'] = int(df_dep['pol party'].value_counts().to_frame().values[i][0])
    i = i + 1
df_polpar['members'] = df_polpar['members'].astype(int)

## Add a color for each political party

Let's add a color per political party when we display them

In [34]:
df_polpar['color'] = ['tab:red', 'tab:olive', 'tab:green', 'tab:orange', 'tab:blue',
                      'tab:cyan', 'tab:brown', 'tab:purple', 'tab:pink', 'tab:grey']

Replace political party code by abreviated name in df_dep

In [35]:
update_list = df_polpar.drop(columns = ['name','members', 'color']).values.tolist()
for i in range(len(update_list)):
    df_dep.loc[df_dep['pol party'] == update_list[i][0], 'pol party'] = update_list[i][1]

## Arrange the activity categories

In [36]:
df_dep['activity'].unique()

array(['Cadres et professions intellectuelles supérieures',
       "Artisans, commerçants et chefs d'entreprise",
       'Agriculteurs exploitants', 'Ouvriers', 'Retraités',
       'Professions Intermédiaires', 'Employés',
       'Sans profession déclarée',
       'Autres personnes sans activité professionnelle', 'inconnu'],
      dtype=object)

In [37]:
df_dep.loc[df_dep['activity'] == 'Cadres et professions intellectuelles supérieures', 'activity'] = 'Cadres'
df_dep.loc[df_dep['activity'] == "Artisans, commerçants et chefs d'entreprise", 'activity'] = 'Entrepreneurs'
df_dep.loc[df_dep['activity'] == 'Professions Intermédiaires', 'activity'] = 'Prof. inter.'
df_dep.loc[df_dep['activity'] == 'Agriculteurs exploitants', 'activity'] = 'Agriculteurs'
df_dep.loc[df_dep['activity'] == 'Sans profession déclarée', 'activity'] = 'Non déclaré'
df_dep.loc[df_dep['activity'] == 'Autres personnes sans activité professionnelle', 'activity'] = 'Non déclaré'
df_dep.loc[df_dep['activity'] == 'inconnu', 'activity'] = 'Non déclaré'

In [38]:
df_dep['activity'].unique()

array(['Cadres', 'Entrepreneurs', 'Agriculteurs', 'Ouvriers', 'Retraités',
       'Prof. inter.', 'Employés', 'Non déclaré'], dtype=object)

## save deputy and political parties dataframes as cvs

In [39]:
df_dep.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 570 entries, 0 to 569
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   code           570 non-null    object
 1   sex            570 non-null    object
 2   family name    570 non-null    object
 3   first name     570 non-null    object
 4   date of birth  570 non-null    object
 5   activity       570 non-null    object
 6   pol party      570 non-null    object
 7   dep            570 non-null    object
 8   num_dep        570 non-null    object
 9   circo          570 non-null    object
dtypes: object(10)
memory usage: 49.0+ KB


In [40]:
df_dep.sample(20)

Unnamed: 0,code,sex,family name,first name,date of birth,activity,pol party,dep,num_dep,circo
505,PA722000,male,Buchou,Stéphane,1974-03-14,Cadres,LAREM,Vendée,85,3
248,PA719608,female,Ménard,Emmanuelle,1968-08-15,Cadres,ND,Hérault,34,6
105,PA607553,male,Potier,Dominique,1964-03-17,Agriculteurs,PS,Meurthe-et-Moselle,54,5
149,PA718682,male,Trompille,Stéphane,1982-12-01,Employés,LAREM,Ain,1,4
62,PA333421,male,Pancher,Bertrand,1958-06-05,Cadres,UDRL,Meuse,55,1
272,PA719822,female,Thillaye,Sabine,1959-05-18,Entrepreneurs,MODEM,Indre-et-Loire,37,5
173,PA718926,male,Dharréville,Pierre,1975-06-15,Prof. inter.,PCF,Bouches-du-Rhône,13,13
497,PA721924,female,de Vaucouleurs,Michèle,1964-02-12,Cadres,MODEM,Yvelines,78,7
294,PA720046,female,Dufeu,Audrey,1980-06-03,Cadres,LAREM,Loire-Atlantique,44,8
442,PA721434,female,Lardet,Frédérique,1966-09-01,Cadres,LAREM,Haute-Savoie,74,2


In [41]:
df_polpar.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 18
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   code             10 non-null     object
 1   name             10 non-null     object
 2   abreviated_name  10 non-null     object
 3   members          10 non-null     int32 
 4   color            10 non-null     object
dtypes: int32(1), object(4)
memory usage: 440.0+ bytes


In [42]:
df_polpar.sample(10)

Unnamed: 0,code,name,abreviated_name,members,color
12,PO744858,Régions et peuples solidaires,RPS,11,tab:brown
18,PO761294,La République en Marche,LAREM,270,tab:grey
11,PO744856,La France Insoumise,FI,17,tab:cyan
9,PO710396,Les Républicains,REP,101,tab:blue
16,PO746314,Non déclaré(s),ND,30,tab:pink
0,PO684926,Parti communiste français,PCF,14,tab:red
4,PO684938,Mouvement Démocrate,MODEM,65,tab:orange
1,PO684932,Parti socialiste,PS,25,tab:olive
15,PO744864,"Union des démocrates, radicaux et libéraux",UDRL,24,tab:purple
3,PO684936,Europe Écologie Les Verts,EELV,13,tab:green


In [43]:
df_dep.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_dep.csv',index=False)

In [44]:
df_polpar.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_polpar.csv',index=False)

# Load data about organs and organizations deputies belong to

In [45]:
column_names = ["code", "type", "name", "abreviated_name"]
df_organe = pd.DataFrame(columns = column_names)
row_index = 0

for path, subdirs, files in os.walk(os.path.join(parent(os.getcwd()), 'data', 'json')):
    for name in files:
        if name[0:2] == 'PO':
            with open(os.path.join(path, name)) as f:
                data = json.load(f)
                l = [data['organe']['uid']]
                l.append(data['organe']['codeType'])
                l.append(data['organe']['libelle'])
                l.append(data['organe']['libelleAbrev'])
                df_organe.loc[row_index] = l
                row_index = row_index + 1 

In [46]:
df_organe.sample(20)

Unnamed: 0,code,type,name,abreviated_name
362,PO733467,GA,France-République dominicaine,DOM
320,PO733247,GA,France-Roumanie,ROU
416,PO744155,MISINFO,Mission d'information sur les mers et océans :...,OCEANS
294,PO733117,GA,France-Chine,CHIN
617,PO773424,MINISTERE,"Ministère auprès de la ministre des armées, ch...",ANC
141,PO60277,ORGEXTPARL,Observatoire national de la sécurité et de l'a...,138
480,PO747032,GE,Les Kurdes,KURDES
97,PO540562,ORGEXTPARL,Conseil d'administration de Campus France,292
252,PO732781,DELEGBUREAU,Délégation chargée des représentants d'intérêt...,REP
471,PO746977,GE,Impact des changements climatiques,IMPACTCLIM


In [47]:
df_organe.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_organs.csv',index=False)

In [48]:
column_names = ["code_organe", "code_deputy"]
df_deputies_in_organe = pd.DataFrame(columns = column_names)
row_index = 0

for path, subdirs, files in os.walk(os.path.join(parent(os.getcwd()), 'data', 'json')):
    for name in files:
        if name[0:2] == 'PA':
            with open(os.path.join(path, name)) as f:
                data = json.load(f)
                for i in range(len(data['acteur']['mandats']['mandat'])):
                    l = [data['acteur']['mandats']['mandat'][i]['organes']['organeRef']]
                    l.append(data['acteur']['uid']['#text'])
                    df_deputies_in_organe.loc[row_index] = l
                    row_index = row_index + 1
df_deputies_in_organe.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16689 entries, 0 to 16688
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   code_organe  16689 non-null  object
 1   code_deputy  16689 non-null  object
dtypes: object(2)
memory usage: 391.1+ KB


In [49]:
df_deputies_in_organe.sample(5)

Unnamed: 0,code_organe,code_deputy
4254,PO746857,PA609726
5945,PO763264,PA718926
11862,PO747271,PA720846
11786,PO717436,PA720814
9534,PO782816,PA720066


In [50]:
df_deputies_in_organe.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_deputies_in_organs.csv',index=False)

# Load data about votes

### Download the data and extract them in the data folder

In [51]:
link = 'http://data.assemblee-nationale.fr/static/openData/repository/15/loi/scrutins/Scrutins_XV.json.zip'

In [52]:
r = requests.get(link)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(os.path.join(parent(os.getcwd()), 'data', 'json', 'vote'))
#move the file in a parent folder
source_dir = os.path.join(parent(os.getcwd()), 'data', 'json', 'vote', 'json')
target_dir = os.path.join(parent(os.getcwd()), 'data', 'json', 'vote')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)
#remove the source dir
os.rmdir(source_dir)

### Extract all the votes infos from the csv files

The next cell scans through all the files with votes info (they start with 'VT') and extracts the relevant data

In [105]:
def get_vote_row(vote, deputy, type_, spe):
    if(type_ == 'pour'):
        return [vote, deputy, '1', '0', '0', spe]
    if(type_ == 'contre'):
        return [vote, deputy, '0', '1', '0', spe]
    if(type_ == 'abstention'):
        return [vote, deputy, '0', '0', '1', spe]

In [106]:
column_descr = ["code", "date", "type", "titre", "demandeur", "nb votants", "requis", "pour", "contre", "abstentions"]
df_vote_descr = pd.DataFrame(columns = column_descr)
columns_total = ["scrutin", "deputy code", "pour", "contre", "abstentions", "par delegation"]
rows = []
row_index_descr = 0
apply_correction = 0
for path, subdirs, files in os.walk(os.path.join(parent(os.getcwd()), 'data', 'json')):
    for name in files:
        if name[0:2] == 'VT':
            with open(os.path.join(path, name)) as f:
                data = json.load(f)
                l = [data['scrutin']['uid']]
                l.append(data['scrutin']['dateScrutin'])
                l.append(data['scrutin']['typeVote']['libelleTypeVote'])
                l.append(data['scrutin']['titre'])
                l.append(data['scrutin']['demandeur']['texte'])
                l.append(data['scrutin']['syntheseVote']['nombreVotants'])
                l.append(data['scrutin']['syntheseVote']['nbrSuffragesRequis'])
                l.append(data['scrutin']['syntheseVote']['decompte']['pour'])
                l.append(data['scrutin']['syntheseVote']['decompte']['contre'])
                l.append(data['scrutin']['syntheseVote']['decompte']['abstentions'])
                df_vote_descr.loc[row_index_descr] = l
                row_index_descr = row_index_descr + 1
                
                votes = data['scrutin']['ventilationVotes']['organe']['groupes']['groupe']
                for i in range(len(votes)):
                    # vote, acteur code, pour, contre, non votants, abstentions, cause, delegation
                    scrutin = data['scrutin']['uid']
                    
                    nb_pour = nb_contre = nb_abstentions = 0
                    
                    if (votes[i]['vote']['decompteNominatif']['pours'] != None):                          
                        nb_pour = len(votes[i]['vote']['decompteNominatif']['pours']['votant'])
                        if (int(votes[i]['vote']['decompteVoix']['pour']) == 1):
                            nb_pour = 1
                        
                    if (votes[i]['vote']['decompteNominatif']['contres'] != None):
                        nb_contre = len(votes[i]['vote']['decompteNominatif']['contres']['votant'])
                        if (int(votes[i]['vote']['decompteVoix']['contre']) == 1):
                            nb_contre = 1
                            
                    if (votes[i]['vote']['decompteNominatif']['abstentions'] != None):
                        nb_abstentions = len(votes[i]['vote']['decompteNominatif']['abstentions']['votant'])
                        if (int(votes[i]['vote']['decompteVoix']['abstentions']) == 1):
                            nb_abstentions = 1
                    
                    for k in range(nb_pour):
                        if nb_pour == 1:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['pours']['votant']['acteurRef'], 
                                                     'pour', votes[i]['vote']['decompteNominatif']['pours']['votant']['parDelegation']))
                            rows.append(row)
                        else:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['pours']['votant'][k]['acteurRef'], 
                                                     'pour', votes[i]['vote']['decompteNominatif']['pours']['votant'][k]['parDelegation']))
                            rows.append(row)
                    for l in range(nb_contre):
                        if nb_contre == 1:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['contres']['votant']['acteurRef'], 
                                                     'contre', votes[i]['vote']['decompteNominatif']['contres']['votant']['parDelegation']))
                            rows.append(row)
                        else:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['contres']['votant'][l]['acteurRef'], 
                                                     'contre', votes[i]['vote']['decompteNominatif']['contres']['votant'][l]['parDelegation']))
                            rows.append(row)
                    for m in range(nb_abstentions):
                        if nb_abstentions == 1:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['abstentions']['votant']['acteurRef'], 
                                                     'abstention', votes[i]['vote']['decompteNominatif']['abstentions']['votant']['parDelegation']))
                            rows.append(row)
                        else:
                            row = (get_vote_row(scrutin, votes[i]['vote']['decompteNominatif']['abstentions']['votant'][m]['acteurRef'], 
                                                     'abstention', votes[i]['vote']['decompteNominatif']['abstentions']['votant'][m]['parDelegation']))
                            rows.append(row)
                            
df_vote_total = pd.DataFrame(rows, columns = columns_total)                                                     

In [107]:
df_vote_descr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3975 entries, 0 to 3974
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   code         3975 non-null   object
 1   date         3975 non-null   object
 2   type         3975 non-null   object
 3   titre        3975 non-null   object
 4   demandeur    3969 non-null   object
 5   nb votants   3975 non-null   object
 6   requis       3975 non-null   object
 7   pour         3975 non-null   object
 8   contre       3975 non-null   object
 9   abstentions  3975 non-null   object
dtypes: object(10)
memory usage: 341.6+ KB


In [108]:
df_vote_total.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416369 entries, 0 to 416368
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   scrutin         416369 non-null  object
 1   deputy code     416369 non-null  object
 2   pour            416369 non-null  object
 3   contre          416369 non-null  object
 4   abstentions     416369 non-null  object
 5   par delegation  416369 non-null  object
dtypes: object(6)
memory usage: 19.1+ MB


### Change the type of some columns to numerical and drop non relevant columns

In [109]:
df_vote_descr[["nb votants", "requis", "pour", "contre", "abstentions"]] = df_vote_descr[["nb votants", "requis", "pour", "contre", "abstentions"]].apply(pd.to_numeric)

df_vote_total[["pour", "contre", "abstentions"]] = df_vote_total[["pour", "contre", "abstentions"]].apply(pd.to_numeric)

### Check if some categorical columns have too many categories

In [110]:
df_vote_total['par delegation'].value_counts()

false    367570
true      48799
Name: par delegation, dtype: int64

In [111]:
df_vote_total.loc[df_vote_total['par delegation'] == 'false', 'par delegation'] = 0
df_vote_total.loc[df_vote_total['par delegation'] == '0', 'par delegation'] = 0
df_vote_total.loc[df_vote_total['par delegation'] == 'true', 'par delegation'] = 1
df_vote_total['par delegation'].value_counts()

0    367570
1     48799
Name: par delegation, dtype: int64

In [112]:
len(df_vote_descr['demandeur'].unique())

200

### We need to translate the 'demandeur' column into a more usable variable
Let's create a column per political party and assign them 1 if they are part of the demandeur and 0 if not

In [113]:
df_vote_descr['demandeur REP'] = 0
df_vote_descr['demandeur LAREM'] = 0
df_vote_descr['demandeur FI'] = 0
df_vote_descr['demandeur PS'] = 0
df_vote_descr['demandeur EELV'] = 0
df_vote_descr['demandeur MODEM'] = 0
df_vote_descr['demandeur ND'] = 0
df_vote_descr['demandeur RPS'] = 0
df_vote_descr['demandeur UDRL'] = 0
df_vote_descr['demandeur PCF'] = 0
df_vote_descr['demandeur CDP'] = 0
df_vote_descr['demandeur GOV'] = 0
df_vote_descr['demandeur COM SPE'] = 0

In [114]:
df_vote_descr['demandeur'] = df_vote_descr['demandeur'].fillna("a")

In [115]:
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Les Républicains"), 'demandeur REP'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Les Republicains"), 'demandeur REP'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Nouvelle Gauche"), 'demandeur PS'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("La France insoumise"), 'demandeur FI'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("La République en Marche"), 'demandeur LAREM'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("La Republique en Marche"), 'demandeur LAREM'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("UDI"), 'demandeur UDRL'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Gauche démocrate et républicaine"), 'demandeur PCF'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Gauche democrate"), 'demandeur PCF'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Socialistes et apparentés"), 'demandeur PS'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Libertés et Territoires"), 'demandeur RPS'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Conference des Presidents"), 'demandeur CDP'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Conférence des Présidents"), 'demandeur CDP'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Mouvement Démocrate"), 'demandeur MODEM'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Gouvernement"), 'demandeur GOV'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Commission"), 'demandeur COM SPE'] = 1
df_vote_descr.loc[df_vote_descr['demandeur'].str.contains("Agir Ensemble"), 'demandeur UDRL'] = 1

### Save the dataframes as csv

In [116]:
df_vote_descr.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_vote_descr.csv',index=False)
df_vote_total.to_csv(os.path.join(parent(os.getcwd()), 'data') + '\df_vote_total.csv',index=False)