# Qu'y a-t-il dans ce notebook ?

Permet l'analyse des contextes des fichiers CSV.

Fichiers utilisés : form_fem_(\*).csv

Le but est d'enlever les contexte où la forme est utilisée comme adjectif

In [None]:
import pandas as pd
import spacy
import csv

In [None]:
nlp = spacy.load("fr_core_news_lg")

## Définition des fonctions

In [None]:
def extract_contexts(data_frame):
    
    """ Extracts contexts from DataFrame
    -> DataFrame
    <- list of contexts (strings) : [context1, context2, context3 ...]
    """
    
    example = data_frame[["Unnamed: 0", "Unnamed: 1", "Unnamed: 2"]]
    return list(example[["Unnamed: 0"][0]] + " " + example[["Unnamed: 1"][0]] + " " + example[["Unnamed: 2"][0]])


In [None]:
def get_word_forms(data_frame):
    
    """ Extracts word forms from DataFrame
    -> DataFrame
    <- list of word forms
    """

    index = list(data_frame.columns)
    return index[3:]

In [None]:
def is_noun (word_to_check) :
    
    """ Check if the word is a noun. If it is, return True
    -> a word
    <- Boolean
    """

    if word_to_check.pos_ == "NOUN": return True

In [None]:
def extract_wanted_form (word_context):
    
    """ Check pos of word_context. If it's not a noun, return True
    -> a word
    <- Boolean
    """

    if not is_noun(word_context) : return True

In [None]:
def pop_row(data_frame, context_list, word_list):
    
    """ Pop a row in which the word_form is not a noun. Rows are deleted inplace
    -> DataFrame
    -> context_list : list of SN contexts
    -> word_list : list of word forms
    """
    
    c = 0
    
    for context in context_list:
        doc = nlp(context)
        for word in word_list:
            if word == doc[2].text:
                must_pop = extract_wanted_form(doc[2])
                if must_pop : 
                    data_frame.drop([c], axis=0, inplace=True)
        c += 1

## Tri pour le fichier "form_fem_candidate_bis.csv"

In [None]:
data = pd.read_csv("form_fem_candidate_bis.csv")
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.to_csv("form_fem_candidate_ter.csv")

In [None]:
#Tri secondaire manuel

data.drop([23,36,64,76,85,96,105, 106, 107,108,125], axis=0, inplace=True)
print(data.shape)

In [None]:
data.to_csv("form_fem_candidate_ter.csv")

## Tri pour le fichier "form_fem_citoyenne_bis.csv"

In [None]:
data = pd.read_csv("form_fem_citoyenne_bis.csv")
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.to_csv("form_fem_citoyenne_ter.csv")

## Tri pour le fichier "form_fem_depute_bis.csv"

In [None]:
data = pd.read_csv("form_fem_depute_bis.csv")
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.to_csv("form_fem_depute_ter.csv")

## Tri pour le fichier "form_fem_elue_bis.csv"

In [None]:
data = pd.read_csv("form_fem_elue_bis.csv")
data.shape
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.shape
data.to_csv("form_fem_elue_ter.csv")

## Tri pour le fichier "form_fem_migrante_bis.csv"

In [None]:
data = pd.read_csv("form_fem_migrante_bis.csv")
data.shape
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.drop([4, 8, 10, 14, 15 , 16, 17, 18], axis=0, inplace=True)
data.to_csv("form_fem_migrante_ter.csv")

## Tri pour le fichier "form_fem_representante_bis.csv"

In [None]:
data = pd.read_csv("form_fem_representante_bis.csv")
data.shape
contexts = extract_contexts(data)
forms = get_word_forms(data)
print(forms)
pop_row(data, contexts, forms)
data.shape
data.to_csv("form_fem_representante_ter.csv")