In [None]:
import json
import pandas as pd 

with open('../input_data/data_presidentielle_2022_nspp.json', 'r', encoding="utf-8") as file:
    liste_sondages_nspp = json.load(file)

In [None]:
first_tour_scores = []

for index in range(len(liste_sondages_nspp)):
    sondage = liste_sondages_nspp[index]
    nom_institut = sondage["nom_institut"]
    debut_enquete = sondage["debut_enquete"]
    fin_enquete = sondage["fin_enquete"]
    echantillon = sondage["echantillon"]
    tours = sondage["tours"]
    for tour in tours:
        if tour['tour'] == 'Premier tour': #we restrict ourselves to the first round
            # print(tour)
            for hypothesis in tour['hypotheses']:
                    hypo_name = hypothesis['hypothese']
                    scores = []
                    for candidat in hypothesis['candidats']:
                        candidate_info = {
                            'candidat': candidat['candidat'],
                            'parti': candidat['parti'],
                            'intentions': candidat['intentions']
                        }
                        scores.append(candidate_info)
                    first_tour_scores.append({'nom_institut':nom_institut, 'debut_enquete':debut_enquete, 'fin_enquete':fin_enquete, 'echantillon':echantillon, 'hypothese': hypo_name, 'scores': scores})

first_tour_scores

# 1st version: with candidates' names


In [None]:
unique_candidates = set()
for tour in first_tour_scores:
    for candidate in tour['scores']:
        unique_candidates.add(candidate["candidat"])
unique_candidates

In [None]:

rows_candidates = []
for tour in first_tour_scores:
    row = {'Company':tour['nom_institut'], 'collectPeriodFrom':tour['debut_enquete'], 'collectPeriodTo':tour['fin_enquete'], 'n':tour['echantillon'], 'Hypothese': tour['hypothese']}
    for candidate in unique_candidates:
        row[candidate] = None
    for candidate in tour['scores']:
        #print(candidate, candidate["candidat"])
        if candidate["candidat"] in unique_candidates:
            row[candidate["candidat"]] = candidate['intentions']
    rows_candidates.append(row)

rows_candidates

#TODO regrouper candidats par parti 

In [None]:
df_candidates = pd.DataFrame(rows_candidates)
df_candidates['Total'] = df_candidates[list(unique_candidates)].sum(axis=1)
print(len(df_candidates[df_candidates['Total']!=100]))
df_candidates[df_candidates['Total']!=100] #TODO look at these id 20220118_0121r_ow that are 0 because scores split into 2 parts in the json
#df_candidates

In [None]:
df_candidates.to_csv('../output_data/df_candidates.csv', index=False, encoding='utf-8')

# 2nd version: with party's names

In [None]:
unique_parties = set()
for tour in first_tour_scores:
    for candidate in tour['scores']:
        for party in candidate['parti']:
            if party != '':
                unique_parties.add(party)
unique_parties

In [None]:
link_candidate_party = {'Arnaud Montebourg': 'Montebourg', 'Jean Lassalle':'Résistons', 'Eric Zemmour':'Reconquête', 'Christiane Taubira':'Taubira', 'Jean-Christophe Lagarde':'UDI', 'Jean-Luc Mélenchon':'France insoumise', 'Anne Hidalgo':'Parti socialiste', 'Yannick Jadot':'EE-LV', 'Valérie Pécresse':'Les Républicains', 'Xavier Bertrand':'Les Républicains', 'Bruno Retailleau':'Les Républicains'} #"small" candidates like Montebourg or Taubira are assigned their own party
for key,value in link_candidate_party.items():
    unique_parties.add(value)
unique_parties


# but issue when several candidates from a same party (Montebourg, Taubira and Hidalgo for the Parti socialiste TODO) choix qui peut etre fait : les regrouper ensemble
# in fact is this really an issue, as long as all candidates have a party? need to think more about it 

In [None]:

rows_parties = []
for tour in first_tour_scores:
    row = {'Company':tour['nom_institut'], 'collectPeriodFrom':tour['debut_enquete'], 'collectPeriodTo':tour['fin_enquete'], 'n':tour['echantillon'], 'Hypothese': tour['hypothese']}
    for party in unique_parties:
        row[party] = 0
    for candidate in tour['scores']:
        if candidate["parti"] == [''] or len(candidate["parti"])>1: #no party or several
            if candidate["candidat"] not in link_candidate_party.keys(): #parties we will need to add by hand
                print(candidate, candidate["candidat"])
                print("no party")
            candidate["parti"] = [link_candidate_party[candidate["candidat"]]]
        for party in candidate['parti']: 
            if party in unique_parties:
                row[party] += candidate['intentions']
    rows_parties.append(row)

#rows_parties

In [None]:
df_parties = pd.DataFrame(rows_parties)
df_parties['Total'] = df_parties[list(unique_parties)].sum(axis=1)
print(len(df_parties[df_parties['Total']!=100]))
df_parties

In [None]:
df_parties.to_csv('../output_data/df_parties.csv', index=False, encoding='utf-8')