In [1]:
import luigi
import pandas as pd
import os
import logging



Création du dossier pour stocker les données nétoyer

In [2]:
# Vérifiez si le dossier n'existe pas déjà

import shutil


dossier = 'CleanData'

if not os.path.exists(dossier):
    # Créez le dossier
    os.makedirs(dossier, mode=0o777)
    print("Dossier créé avec succès.")


In [3]:
import sys

class ReadDataCanton(luigi.Task):
    file_path = luigi.Parameter()


    def output(self):
        return luigi.LocalTarget("./CleanData/canton.csv")

    def run(self):
        
        df = pd.read_csv(self.file_path, sep=";")
        cantons = pd.DataFrame(columns=df.columns[0:25])
        df_partis = pd.read_csv('./RawData/Partis.csv', sep=",")

        for index, row in df.iterrows():
            #print(f"index : {index} \n row : {row[0:18]} \n\n")
            row1 = pd.Series(row[0:18], index=df.columns[0:18])

            for i in range(0, 10):
                # Ligne 2
                row2 = pd.Series(row.iloc[18 + i * 7:18 + ((i + 1) * 7)])

                df_row2 = pd.DataFrame([row2])
                
                df_row2_renamed = pd.DataFrame(df_row2.values, columns=['N°Panneau','Sexe','Nom','Prenom','Voix','% Voix/Ins','% Voix/Exp'])

                # Fusionner les deux lignes
                merged_row = pd.concat([row1, df_row2_renamed.squeeze()], axis=0)

                # Convertir la ligne fusionnée en DataFrame avec une seule ligne
                merged_row_df = pd.DataFrame([merged_row])

                # Ajouter la ligne fusionnée au DataFrame
                cantons = pd.concat([cantons, merged_row_df], ignore_index=True)

                #print(len(cantons))

        cantons = pd.merge(cantons, df_partis, on=['Nom', 'Prenom'], how='left')

        print(cantons.duplicated(['Nom', 'Prenom']).sum())
        print(df_partis.duplicated(['Nom', 'Prenom']).sum())

        #display(cantons)
        cantons.to_csv(self.output().path, index=False)




In [4]:
class ExtractCandidats(luigi.Task):
    file_path = luigi.Parameter()

    def requires(self):
        return ReadDataCanton(file_path="./RawData/canton_t1_2017.csv")
    
    def output(self):
        return luigi.LocalTarget("./CleanData/candidats.csv")

    def run(self):
                
        df = pd.read_csv(self.file_path, sep=",")
       
        display(df);

        df = df.loc[:, ["Prenom", "Nom"]]
        df = df.drop_duplicates()
        
        df.to_csv(self.output().path, index=False)

In [5]:
class ExtractDelinquance(luigi.Task):
    file_path = luigi.Parameter()

    def output(self):
        return luigi.LocalTarget("./CleanData/delinquance.csv")

    def run(self):
                        
        df = pd.read_csv(self.file_path, sep=";")
        # Faites quelque chose avec le dataframe, par exemple, l'afficher

        df.to_csv(self.output().path, index=False)

In [6]:
class ReadAllData(luigi.Task):
    def requires(self):
        return [ExtractCandidats(file_path="./CleanData/canton.csv"), 
                ExtractDelinquance(file_path="./RawData/Donnees_delinquance.csv")]
    
    def run(self):
        print("lancement")

    def output(self):
        return luigi.LocalTarget('result.txt')

In [7]:

config = luigi.configuration.get_config()
config.set('core', 'no_lock', 'False')

dossier = "./CleanData/"
Restart = True

if Restart is True:
    # Parcourir tous les fichiers du dossier
    for fichier in os.listdir(dossier):
        chemin_fichier = os.path.join(dossier, fichier)
        # Supprimer le fichier
        os.remove(chemin_fichier)

luigi.build([ReadAllData()], local_scheduler=False, no_lock=True)

DEBUG: Checking if ReadAllData() is complete
DEBUG: Checking if ExtractCandidats(file_path=./CleanData/canton.csv) is complete
DEBUG: Checking if ExtractDelinquance(file_path=./RawData/Donnees_delinquance.csv) is complete
INFO: Informed scheduler that task   ReadAllData__99914b932b   has status   PENDING
INFO: Informed scheduler that task   ExtractDelinquance___RawData_Donnee_8bfbeed715   has status   PENDING
DEBUG: Checking if ReadDataCanton(file_path=./RawData/canton_t1_2017.csv) is complete
INFO: Informed scheduler that task   ExtractCandidats___CleanData_cant_f691398620   has status   PENDING
INFO: Informed scheduler that task   ReadDataCanton___RawData_canton_95a4352c04   has status   PENDING
INFO: Done scheduling tasks
INFO: Running Worker with 1 processes
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 4
INFO: [pid 16272] Worker Worker(salt=8936263000, workers=1, host=DESKTOP-PBH195U, username=Marin, pid=16272) running   ExtractDelinquance(file_path=./RawData/Donnees_d

20889
0


Unnamed: 0,Code du departement,Libelle du departement,Code du canton,Libelle du canton,Inscrits,Abstentions,% Abs/Ins,Votants,% Vot/Ins,Blancs,...,N°Panneau,Sexe,Nom,Prenom,Voix,% Voix/Ins,% Voix/Exp,Tendance,Code Tendance,Parti polititque
0,1,Ain,1,Amberieu-en-Bugey,19922,3909,1962,16013,8038,353,...,2,F,LE PEN,Marine,4408,2213,2837,d'extrême droite,1,RN
1,1,Ain,1,Amberieu-en-Bugey,19922,3909,1962,16013,8038,353,...,9,M,MeLENCHON,Jean-Luc,3196,1604,2057,d'extrême gauche,2,LFI
2,1,Ain,1,Amberieu-en-Bugey,19922,3909,1962,16013,8038,353,...,3,M,MACRON,Emmanuel,3125,1569,2011,gauche,3,LREM
3,1,Ain,1,Amberieu-en-Bugey,19922,3909,1962,16013,8038,353,...,11,M,FILLON,Francois,2483,1246,1598,droite,4,LR
4,1,Ain,1,Amberieu-en-Bugey,19922,3909,1962,16013,8038,353,...,1,M,DUPONT-AIGNAN,Nicolas,883,443,568,droite,4,DLF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20895,ZZ,Francais etablis hors de France,1,Canton fictif,1264113,704383,5572,559730,4428,3169,...,1,M,DUPONT-AIGNAN,Nicolas,8837,07,159,droite,4,DLF
20896,ZZ,Francais etablis hors de France,1,Canton fictif,1264113,704383,5572,559730,4428,3169,...,10,M,ASSELINEAU,Francois,5578,044,101,droite,4,UPR
20897,ZZ,Francais etablis hors de France,1,Canton fictif,1264113,704383,5572,559730,4428,3169,...,6,M,POUTOU,Philippe,3414,027,062,d'extrême gauche,2,NPA
20898,ZZ,Francais etablis hors de France,1,Canton fictif,1264113,704383,5572,559730,4428,3169,...,8,M,LASSALLE,Jean,2530,02,046,droite,4,RES


INFO: [pid 16272] Worker Worker(salt=8936263000, workers=1, host=DESKTOP-PBH195U, username=Marin, pid=16272) done      ExtractCandidats(file_path=./CleanData/canton.csv)
DEBUG: 1 running tasks, waiting for next task to finish
INFO: Informed scheduler that task   ExtractCandidats___CleanData_cant_f691398620   has status   DONE
DEBUG: Asking scheduler for work...
DEBUG: Pending tasks: 1
INFO: [pid 16272] Worker Worker(salt=8936263000, workers=1, host=DESKTOP-PBH195U, username=Marin, pid=16272) running   ReadAllData()
INFO: [pid 16272] Worker Worker(salt=8936263000, workers=1, host=DESKTOP-PBH195U, username=Marin, pid=16272) done      ReadAllData()
DEBUG: 1 running tasks, waiting for next task to finish
INFO: Informed scheduler that task   ReadAllData__99914b932b   has status   DONE
DEBUG: Asking scheduler for work...
DEBUG: Done
DEBUG: There are no more tasks to run at this time
INFO: Worker Worker(salt=8936263000, workers=1, host=DESKTOP-PBH195U, username=Marin, pid=16272) was stopped. 

lancement


True