In [3]:
A = {1,3,4}
B = {1,5,4}
len(A.intersection(B))

2

### TODO:
Add relative weight considering how many times pepople meet in the ballot, i.e :
weight = sum(same vote)/(how much they did vote for the same ballot)

In [1]:
import xml.etree.ElementTree as ET
import urllib.request
import pandas as pd

In [27]:
def open_xml(url : str):
    with urllib.request.urlopen(url) as response:
       xml_data = response.read()
    return xml_data

def get_element_tree(url):
    xml_data = open_xml(url)
    root = ET.fromstring(xml_data)
    return root

def set_deputes_df(root_xml_tree, breaker = False, legislature = "www"):
    
    root_xml_tree = get_element_tree('https://' + legislature + '.nosdeputes.fr/deputes/xml')
    deputes = list()
    depute_infos = ['nom', 'slug', 'num_deptmt', 'groupe_sigle']
    vote_choice = ['pour', 'contre', 'abstention']
    
    stopper = 0
    # Get the infos of deputes and set up the architecture of the future dataframe
    for depute_tag in root_xml_tree:
        depute_temp = dict()
        # Get the main info of the depute
        for info in depute_infos:
            text = depute_tag.find(info).text
            depute_temp[info] = text
        # Set up the dict for future use
        for additionnal_key in vote_choice:
            depute_temp[additionnal_key] = set()
        depute_temp['loyalty'] = float()
        
        # Add the depute to the list of all deputes
        deputes.append(depute_temp)
        
        stopper += breaker
        if stopper > 10: 
            break
            
    # Get the vote of the deputies
    for i,depute in enumerate(deputes):
        get_depute_votes(depute, legislature)
        print(i,depute['slug'])
    deputes_df = pd.DataFrame(deputes)
    deputes_df.to_json(legislature + "deputes.json", orient = 'records')
    return deputes_df

def get_depute_votes(depute : dict, legislature):
    vote_choice = ['pour', 'contre', 'abstention']
    xml_data = open_xml('https://'+ legislature + '.nosdeputes.fr/' + depute['slug'] + '/votes/xml')
    root = ET.fromstring(xml_data)
    
    # Navigate through XML to get the information of the depute
    for vote_tag in root:
        law_rank = int(vote_tag.find('scrutin/numero').text)
        position = vote_tag.find('position').text
        if position in vote_choice:
            depute[position].add(law_rank)
        else:
            # can happen, some xml have </> errors, loyalty is therefore not relevant so it continues the loop
            continue
        group_position = vote_tag.find('position_groupe').text
        if position == group_position:
            depute['loyalty'] += 1
            
    total_ballot = len(depute['pour']) + len(depute['contre']) + len(depute['abstention'])
    if total_ballot != 0 :
        depute['loyalty'] /= total_ballot
        

def open_deputes_df(legislature = 'www'):
    deputes_df = pd.read_json(legislature + "deputes.json")
    # Set lists into Sets because json format turn them into lists
    deputes_df['pour'] = deputes_df['pour'].apply(set)
    deputes_df['contre'] = deputes_df['contre'].apply(set)
    deputes_df['abstention'] = deputes_df['abstention'].apply(set)
    return deputes_df

def set_weight(deputes_df: pd.core.frame.DataFrame):
    # Set up a temporary dataframe with few features from deputes_df
    temp_df = deputes_df[['slug', 'pour', 'contre', 'abstention']].copy()
    union_sets = temp_df.apply(lambda x: x['pour'] | x['contre'] | x['abstention'], axis=1)
    temp_df['union'] = union_sets
    temp_df['occurence'] = temp_df['union'].apply(lambda x : len(x))
    
    # Create a dataframe which is the cartesian product of deputés
    relation_df = pd.merge(temp_df, temp_df, how ="cross", suffixes=('_source', '_target'))
    relation_df = relation_df[relation_df['slug_source'] != relation_df['slug_target']]
    print("Relationship dataframe created")
    
    # Compute weight, which is the sum of all co-occurences of same votes for ballots
    relation_df['weight'] = relation_df.apply(
        lambda x: len(x['pour_source'] & x['pour_target']) 
        + len(x['contre_source'] & x['contre_target']) 
        + len(x['abstention_source'] & x['abstention_target']), axis=1)
    print("Weight calculated")
    
    # Compute relative-weight, which is the sum of all co-occurences of same votes for 
    relation_df['relative-weight'] = relation_df.apply(
    lambda x: x['weight'] / len(x['union_source'] & x['union_target']) if len(x['union_source'] & x['union_target']) != 0 else 0, axis=1)
    print("Relative-weight calculated")
    return relation_df


In [3]:
xml_data = open_xml('https://nosdeputes.fr/16/scrutins/xml')
root = ET.fromstring(xml_data)
AMOUNT_BALLOT = int(root.findall('scrutin/numero')[-1].text)

print(AMOUNT_BALLOT, "ballot detected")

876 ballot detected


In [33]:
legislature_list = ['2007-2012', '2012-2017', '2017-2022']
legislature = legislature_list[2]

In [21]:

deputes_df = set_deputes_df(root, legislature = legislature)


0 cedric-roussel
1 nadia-hai
2 pascale-fontenel-personne
3 laurent-pietraszewski
4 guillaume-kasbarian
5 cyrille-isaac-sibille
6 guillaume-vuilletet
7 olivier-faure
8 pierre-alain-raphan
9 isabelle-muller-quoy
10 emmanuelle-anthoine
11 anne-christine-lang
12 laurence-vichnievsky
13 olivier-dassault
14 berangere-abba
15 barbara-bessot-ballot
16 laurent-saint-martin
17 carole-bureau-bonnard
18 annaig-le-meur
19 jean-paul-lecoq
20 bertrand-sorre
21 gabriel-serville
22 aurore-berge
23 bruno-duverge
24 olivier-dussopt
25 martine-wonner
26 christian-hutin
27 alain-tourret
28 paul-molac
29 christophe-euzet
30 jean-marc-zulesi
31 damien-abad
32 fabrice-le-vigoureux
33 bertrand-pancher
34 anne-laure-cattelot
35 mohamed-laqhila
36 loic-kervran
37 francois-michel-lambert
38 valerie-oppelt
39 bruno-questel
40 geraldine-bannier
41 xavier-breton
42 caroline-abadie
43 pierre-vatin
44 claire-pitollat
45 meyer-habib
46 philippe-gosselin
47 sylvain-waserman
48 rodrigue-kokouendo
49 cecile-rilhac
50 mari

In [7]:
deputes_df.head()
deputes_df.to_json("deputes.json", orient='records')

In [9]:
deputes_df = pd.read_json("deputes.json")
deputes_df.loc[deputes_df['slug'] == 'marine-le-pen']
relation_df = pd.read_json('big.json')
relation_df.loc[relation_df['slug_source'] == 'marine-le-pen']


Unnamed: 0,slug_source,slug_target,weight,relative-weight
63986,marine-le-pen,thibault-bazin,63,0.583333
63987,marine-le-pen,jean-victor-castor,53,0.500000
63988,marine-le-pen,benoit-mournet,24,0.143713
63989,marine-le-pen,nicolas-meizonnet,174,0.983051
63990,marine-le-pen,marc-fesneau,0,0.000000
...,...,...,...,...
64579,marine-le-pen,anne-grignon,0,0.000000
64580,marine-le-pen,philippe-guillemard,15,0.185185
64581,marine-le-pen,laurent-esquenet-goxes,20,0.186916
64582,marine-le-pen,agnes-carel,12,0.107143


In [28]:
def real_test(legislature):
    deputes_df = open_deputes_df(legislature)
    relation_df = set_weight(deputes_df)
    final_df = relation_df[['slug_source', 'slug_target', 'weight', 'relative-weight']]
    final_df.to_json(legislature + '_big.json", orient='records')
    return relation_df

relation_df = real_test()

Relationship dataframe created
Weight calculated
Relative-weight calculated


In [38]:
deputes_df.loc[(deputes_df['slug'] == 'pascale-fontenel-personne')]

Unnamed: 0,nom,slug,num_deptmt,groupe_sigle,pour,contre,abstention,loyalty
2,Pascale Fontenel-Personne,pascale-fontenel-personne,72,MODEM,"{1536, 1, 2050, 2049, 2058, 2065, 1044, 2071, ...","{1018, 515, 3, 517, 4, 516, 518, 9, 519, 520, ...","{2059, 3738, 3622, 3623, 2986, 4012, 4013, 401...",0.920668


In [29]:
relation_df.head(-10)
relation_df.loc[(relation_df['slug_source'] == 'thibault-bazin') & (relation_df['slug_target'] == 'danielle-simonnet') | (relation_df['slug_target'] == 'danielle-simonnet') & (relation_df['slug_source'] == 'thibault-bazin')]

Unnamed: 0,slug_source,pour_source,contre_source,abstention_source,union_source,occurence_source,slug_target,pour_target,contre_target,abstention_target,union_target,occurence_target,weight,relative-weight


In [30]:
relation_df.loc[relation_df['slug_source'] == 'yael-braun-pivet']

Unnamed: 0,slug_source,pour_source,contre_source,abstention_source,union_source,occurence_source,slug_target,pour_target,contre_target,abstention_target,union_target,occurence_target,weight,relative-weight
194405,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,cedric-roussel,"{1, 4103, 4104, 2059, 2060, 4108, 4109, 4110, ...","{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 4102...","{2784, 3650, 3171, 3172, 3173, 3174, 3175, 317...","{1, 3, 4, 5, 6, 4103, 4104, 7, 8, 2059, 2060, ...",939,315,0.978261
194406,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,nadia-hai,"{1536, 1, 1540, 1541, 2054, 1543, 1032, 1544, ...","{2048, 2051, 3, 5, 6, 7, 8, 2052, 2053, 2055, ...","{65, 66, 2279, 780, 826, 159}","{2048, 1, 2051, 3, 5, 2054, 6, 2056, 7, 2058, ...",749,233,0.983122
194407,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,pascale-fontenel-personne,"{1536, 1, 2050, 2049, 2058, 2065, 1044, 2071, ...","{1018, 515, 3, 517, 4, 516, 518, 9, 519, 520, ...","{2059, 3738, 3622, 3623, 2986, 4012, 4013, 401...","{1, 2050, 2049, 1018, 3, 4, 2036, 9, 2058, 205...",479,216,0.919149
194408,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,laurent-pietraszewski,"{1536, 1, 1540, 1541, 1542, 1543, 1544, 1545, ...","{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, ...",{101},"{1, 3, 4, 5, 6, 7, 8, 9, 10, 2059, 11, 12, 14,...",640,176,0.983240
194409,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,guillaume-kasbarian,"{1, 2, 2054, 4103, 2056, 4104, 2058, 2059, 410...","{4096, 4097, 4098, 2051, 2052, 2053, 4099, 205...","{1666, 1667, 1668, 3725, 270, 3726, 2064, 3735...","{4096, 1, 2, 4097, 4098, 4099, 4100, 4103, 410...",1077,336,0.991150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195059,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,lamia-el-aaraje,"{4098, 4099, 4100, 4101, 4102, 4105, 4106, 410...","{4103, 4104, 4108, 4109, 3858, 3859, 4114, 386...","{3968, 3969, 3906, 3958, 3959, 3960}","{4098, 4099, 4100, 4101, 4102, 4103, 4104, 410...",176,14,0.119658
195060,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,bernadette-beauvais,{},{},{},{},0,0,0.000000
195061,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,sophie-metadier,"{4249, 4254, 4382, 4257, 4008, 4150, 4279, 415...","{4097, 4258, 3950, 3951, 3952, 3953, 3890, 395...","{4096, 4098, 3855, 3954, 3891, 4090, 3837, 4094}","{4096, 4097, 4098, 3855, 4248, 4249, 4254, 438...",53,19,0.527778
195062,yael-braun-pivet,"{1, 2, 4103, 2058, 2059, 2060, 4108, 4109, 206...","{4096, 4097, 4098, 2051, 2052, 4099, 4100, 410...","{64, 79}","{4096, 1, 2, 4097, 4098, 2051, 2052, 4103, 409...",1018,victor-habert-dassault,"{4101, 3977, 4106, 4107, 4105, 4235, 4236, 398...","{3971, 4102, 4103, 4104, 4108, 4109, 4114, 398...","{4305, 4309, 3855}","{4101, 4102, 4103, 4104, 4105, 4106, 4107, 410...",80,18,0.352941


In [118]:
def test():
    test_dict = [{"nom":"A","slug":"A",
      "num_deptmt":"01","groupe_sigle":"LREM",
      "pour" : set([1,2,3]), 'contre' : set([4,5,6]), 'abstention' : set([7,8,9])},
    {"nom":"B","slug":"B",
      "num_deptmt":"01","groupe_sigle":"LREM",
      "pour" : set([1,2]), 'contre' : set([4]), 'abstention' : set([9])},
    {"nom" :"C","slug" : "C",
      "num_deptmt":"01","groupe_sigle":"LREM",
      "pour" : set([10,3]), 'contre' : set([4,8]), 'abstention' : set()}]
    
    test_df = pd.DataFrame(test_dict)
    display(test_df)
    relation_df = set_weight(test_df)
    final_df = relation_df[['slug_source', 'slug_target', 'weight', 'relative-weight']]
    display(final_df.head())
test()

Unnamed: 0,nom,slug,num_deptmt,groupe_sigle,pour,contre,abstention
0,A,A,1,LREM,"{1, 2, 3}","{4, 5, 6}","{8, 9, 7}"
1,B,B,1,LREM,"{1, 2}",{4},{9}
2,C,C,1,LREM,"{10, 3}","{8, 4}",{}


Relationship dataframe created
Weight calculated
Relative-weight calculated


Unnamed: 0,slug_source,slug_target,weight,relative-weight
1,A,B,4,1.0
2,A,C,2,0.666667
3,B,A,4,1.0
5,B,C,1,1.0
6,C,A,2,0.666667


### Graph part

In [31]:
#### Parameters
louvain_resolution = 1.15 # Sets the resolution of the Louvain Algorithm
louvain_seed = 1
spring_seed = 1

In [122]:
display(deputes_df.head(-10))
relation_df['source'].unique()

NameError: name 'deputes_df' is not defined

In [39]:
##### Create a graph from a pandas dataframe
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

relation_graph = relation_df.copy()
deputes_df = open_deputes_df(legislature)

relative_weight = relation_graph['relative-weight']
relation_graph = relation_graph.loc[relative_weight >= relative_weight.quantile(q=0.50)]

scaler = MinMaxScaler(feature_range = (1,10))
relation_graph['weight'] = scaler.fit_transform(relation_graph[['relative-weight']])
relation_graph['weight'] = relation_graph['weight'].apply(lambda x : round(x, 1))

# Créer un graphe à partir des données relation
G = nx.from_pandas_edgelist(relation_graph, 'slug_source', 'slug_target', 'weight')

# Ajouter les informations 'groupe_sigle' aux noeuds correspondants en utilisant un dictionnaire

groupe_sigle = dict(zip(deputes_df['slug'], deputes_df['groupe_sigle']))
deputes_df['loyalty'] = scaler.fit_transform(deputes_df[['loyalty']]) 
loyalty = dict(zip(deputes_df['slug'], deputes_df['loyalty']))

nx.set_node_attributes(G, groupe_sigle, 'groupe_sigle')
nx.set_node_attributes(G, loyalty, 'loyalty')

# Exporter le graphe en format GEXF
nx.write_gexf(G, legislature + ".gexf")

In [173]:
relation_graph['relative-weight'].describe()

count    165826.000000
mean          6.747024
std           3.246197
min           1.000000
25%           3.000000
50%           8.000000
75%          10.000000
max          10.000000
Name: relative-weight, dtype: float64

In [197]:
deputes_df.loc[deputes_df['slug'] == 'sebastien-chenu']

Unnamed: 0,nom,slug,num_deptmt,groupe_sigle,pour,contre,abstention,loyalty
304,Sébastien Chenu,sebastien-chenu,59,RN,"[512, 514, 3, 515, 5, 6, 7, 9, 10, 11, 17, 18,...","[4, 8, 13, 15, 144, 20, 151, 155, 156, 157, 40...","[129, 139, 12, 140, 14, 142, 143, 652, 148, 21...",10.0


In [73]:
deputes_df['groupe_sigle']

0         LR
1        GDR
2        REN
3         RN
4      MODEM
       ...  
594    MODEM
595      REN
596    MODEM
597      HOR
598    MODEM
Name: groupe_sigle, Length: 599, dtype: object

In [74]:
relation_df['relative-weight'].describe()

count    90.000000
mean     16.753043
std      13.536893
min       1.000000
25%       4.849447
50%      14.467828
75%      31.768585
max      41.000000
Name: relative-weight, dtype: float64