In [1]:
from readability import Readability
import LIWC_spanish as LIWC
import nltk
nltk.download('vader_lexicon')  
import numpy as np
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
from collections import Counter
import math
import re
import pandas as pd

import requests
req = requests.get('https://raw.githubusercontent.com/JULIELab/XANEW/master/Ratings_Warriner_et_al.csv')
anew = req.text.split('\n')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/sarmad/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
anew_dict = {}
for i in range(1,len(anew)):
  row = anew[i].split(',')
  if len(row)>1:
    anew_dict[row[1]]=[float(row[2]),float(row[5]),float(row[8])]    # VAD

In [3]:
def anew_extract(text,anew_dict):
  words = text.split(' ')
  D = {}
  for w in words:
    if w in anew_dict:
      if 'valence' not in D:
        D['valence']=[]
      if 'arousal' not in D:
        D['arousal']=[]
      if 'dominance' not in D:
        D['dominance']=[]
      D['valence'].append(anew_dict[w][0])
      D['arousal'].append(anew_dict[w][1])
      D['dominance'].append(anew_dict[w][1])
  D_new = {}
  for f in D:
    D_new[f] = np.mean(D[f])
  return(D_new)

In [4]:
def whiteSpacePerChar(text):
    '''
    Total White Space/Total character
    '''
    totChar=len(text)
    totNoSpace=len(text.replace(' ',''))
    totSpace=totChar-totNoSpace

    return(float(totSpace)/float(totChar))

def digitPerChar(text):
    '''
    total digits/character
    '''
    totChar=len(text)
    NoDigit=text
    for i in range(0,10):
        NoDigit=NoDigit.replace(str(i),'')
    totNoDigit=len(NoDigit)
    totDigit=totChar-totNoDigit
    return(float(totDigit)/float(totChar))

def charLen(text):
    return(len(text)/len(text.split(' ')))

def TTR(text):
  return(len(set(text.split(' ')))/len(text.split(' ')))

def honore(text):
    words=text.split(' ')
    unique=Counter(words)
    num=float(len(words))
    cnt=0
    for w in unique:
        if unique[w]==1:
            cnt=cnt+1
    denom=1-(float(cnt)/float(len(unique)))
    R=100*math.log10(num/denom)
    return(R)

def sichel(text):
    words=text.split(' ')
    unique=Counter(words)
    num=float(len(words))
    cnt=0
    for w in unique:
        if unique[w]==2:
            cnt=cnt+1

    S=float(cnt)/float(len(unique))
    return(S)

def brunet(text):
    words=text.split(' ')
    a=0.172
    W= len(words)** (len(set(words)) **a)
    return(W)



def hapaxLegomena(text):
    '''
    unnormalized
    '''
    words=text.split(' ')
    unique=Counter(words)
    cnt=0
    for w in unique:
        if unique[w]==1:
            cnt=cnt+1

    return(cnt/len(words))

def hapaxDislogemna(text):
    '''
    unnormalized
    '''
    words=text.split(' ')
    unique=Counter(words)
    cnt=0
    for w in unique:
        if unique[w]==2:
            cnt=cnt+1

    return(cnt/len(words))

In [5]:
def safe_apply(func, arg):
    try:
        return func(arg)
    except:
        return np.nan

def complexity_index(text):
  R = {}
  R['hapaxDislogemna']= safe_apply(hapaxDislogemna, text)
  R['hapaxLegomena']= safe_apply(hapaxLegomena, text)
  R['brunet']= safe_apply(brunet, text)
  R['sichel']= safe_apply(sichel, text)
  R['honore'] = safe_apply(honore, text)
  R['TTR'] = safe_apply(TTR, text)
  R['chars'] = safe_apply(charLen, text)
  R['digits'] = safe_apply(digitPerChar, text)
  R['whiteSpace'] = safe_apply(whiteSpacePerChar, text)
  return(R)

In [6]:
def readability_index(text):
  R = {}
  try:
    r = Readability(text)

    R['FKI']=r.flesch_kincaid().score
    R['FKR']=r.flesch().score
    R['GunningFog']=r.gunning_fog().score
    R['CLI']=r.coleman_liau().score
    R['DaleChall']=r.dale_chall().score
    R['ARI']=r.ari().score
    R['linsear']=r.linsear_write().score
    R['spache']=r.spache().score
  
  except:
    R['FKI']= np.nan
    R['FKR']= np.nan
    R['GunningFog']= np.nan
    R['CLI']= np.nan
    R['DaleChall']= np.nan
    R['ARI']= np.nan
    R['linsear']= np.nan
    R['spache']= np.nan

  return(R)

In [7]:
df = pd.read_csv('preprocessed_data.csv')
df

Unnamed: 0,candidate,source,date,text
0,Patricia Bullrich,rionegro,2023-08-15,el ciclo de río negro «vértices» sigue hoy en ...
1,Javier Milei,rionegro,2023-08-15,el ciclo de río negro «vértices» sigue hoy en ...
2,Sergio Tomas Massa,rionegro,2023-09-11,impuesto a las ganancias: massa anunciaría sub...
3,Guillermo Moreno,rionegro,2022-06-14,¿qué hacer en vacaciones de inverno en río neg...
4,Guillermo Moreno,rionegro,2022-11-06,racing le ganó a boca en un partido caliente y...
...,...,...,...,...
145537,Horacio Rodriguez Larreta,youtube,2023-08-01,"agustina kampfer entrevistó a césar biondini, ..."
145538,Patricia Bullrich,youtube,2023-08-01,"agustina kampfer entrevistó a césar biondini, ..."
145539,Alejandro Biondini,youtube,2023-07-31,entrevista a césar biondini en &quot;somos pm&...
145540,Patricia Bullrich,youtube,2023-07-31,entrevista a césar biondini en &quot;somos pm&...


In [10]:
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True, nb_workers=32)

INFO: Pandarallel will run on 32 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [8]:
# group the data in such a way that every candidate's data is in a single row
df = df.groupby(['candidate', 'source', 'date'])['text'].apply(lambda x: '\n'.join(x)).reset_index()
df

Unnamed: 0,candidate,source,date,text
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio..."
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...
...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...


In [11]:
# pandarallel apply function
df_temp = df['text'].parallel_apply(lambda x: pd.Series(LIWC.liwc().getLIWCCount(x)))
df_temp.columns = [f'liwc_{col}' for col in df_temp.columns]

# Directly assign to columns in original dataframe
df[df_temp.columns] = df_temp
print('Done!')
df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=740), Label(value='0 / 740'))), HB…

Done!


Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,liwc_Trabajo,liwc_Logro,liwc_Placer,liwc_Hogar,liwc_Dinero,liwc_Relig,liwc_Muerte,liwc_Asentir,liwc_NoFluen,liwc_Relleno
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,6,4,7,1,0,2,0,0,0,0
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,6,6,5,1,0,1,0,0,0,0
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,10,7,4,1,1,2,0,1,0,0
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,2,3,2,0,6,1,0,1,0,0
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,16,21,4,2,3,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,41930,30295,15080,5307,21248,4688,2764,13725,7823,8
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,19197,13244,6695,2034,9705,1533,971,6016,5829,7
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,10779,8061,3776,1288,5327,1236,596,3482,2944,1
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,10794,8196,4004,1404,4774,1057,879,4182,3518,4


In [12]:
df_temp = df['text'].parallel_apply(lambda x: pd.Series(anew_extract(x,anew_dict)))
df_temp.columns = [f'anew_{col}' for col in df_temp.columns]

# Directly assign to columns in original dataframe
df[df_temp.columns] = df_temp
print('Done!')
df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=740), Label(value='0 / 740'))), HB…

Done!


Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,liwc_Hogar,liwc_Dinero,liwc_Relig,liwc_Muerte,liwc_Asentir,liwc_NoFluen,liwc_Relleno,anew_valence,anew_arousal,anew_dominance
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,1,0,2,0,0,0,0,4.518333,4.235000,4.235000
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,1,0,1,0,0,0,0,4.076667,3.720000,3.720000
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,1,1,2,0,1,0,0,5.178333,3.998333,3.998333
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,0,6,1,0,1,0,0,4.126250,4.156667,4.156667
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,2,3,1,0,1,0,0,3.649474,4.473158,4.473158
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,5307,21248,4688,2764,13725,7823,8,4.933192,4.046524,4.046524
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,2034,9705,1533,971,6016,5829,7,4.958307,4.054976,4.054976
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,1288,5327,1236,596,3482,2944,1,4.981144,4.053256,4.053256
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,1404,4774,1057,879,4182,3518,4,4.963453,4.052129,4.052129


In [16]:
df_temp = df['text'].parallel_apply(lambda x: pd.Series(readability_index(x)))
df_temp.columns = [f'readability_{col}' for col in df_temp.columns]

# Directly assign to columns in original dataframe
df[df_temp.columns] = df_temp
print('Done!')
df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=740), Label(value='0 / 740'))), HB…

Done!


Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,anew_arousal,anew_dominance,readability_FKI,readability_FKR,readability_GunningFog,readability_CLI,readability_DaleChall,readability_ARI,readability_linsear,readability_spache
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,4.235000,4.235000,15.104849,16.532329,19.667409,13.886486,19.703333,12.417099,14.235294,11.371086
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,3.720000,3.720000,18.228275,7.767797,23.608392,15.391758,20.159244,17.181580,21.307692,12.314594
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,3.998333,3.998333,17.979325,11.174388,24.077637,12.531772,19.833203,15.331382,22.083333,12.444405
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,4.156667,4.156667,14.214992,35.261932,18.046427,9.306642,20.036674,11.961521,17.363636,12.484314
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,4.473158,4.473158,14.170838,31.127016,19.296066,11.533353,20.149402,12.556861,16.790323,12.205578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,4.046524,4.046524,1465.616445,-3840.555246,1545.876569,10.147150,208.748202,1921.695902,2692.103025,550.695927
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,4.054976,4.054976,578.841698,-1470.651380,612.388606,9.682718,92.904257,754.899460,1045.717600,221.702500
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,4.053256,4.053256,399.109416,-991.271646,423.202006,9.867510,69.460963,518.361604,722.623810,154.974914
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,4.052129,4.052129,1022.221857,-2655.368072,1079.110007,9.802989,150.810545,1338.259618,1859.655502,386.188693


In [19]:
df_temp = df['text'].parallel_apply(lambda x: pd.Series(complexity_index(x)))
df_temp.columns = [f'complexity_{col}' for col in df_temp.columns]

# Directly assign to columns in original dataframe
df[df_temp.columns] = df_temp
print('Done!')
df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=740), Label(value='0 / 740'))), HB…

Done!


Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,readability_spache,complexity_hapaxDislogemna,complexity_hapaxLegomena,complexity_brunet,complexity_sichel,complexity_honore,complexity_TTR,complexity_chars,complexity_digits,complexity_whiteSpace
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,11.371086,0.101974,0.427632,1.178178e+06,0.171271,303.298198,0.595395,6.388158,0.014418,0.156025
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,12.314594,0.065868,0.482036,1.992702e+06,0.107843,319.990818,0.610778,6.580838,0.011829,0.151501
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,12.444405,0.062706,0.504950,1.280686e+06,0.101064,321.153243,0.620462,6.359736,0.003633,0.156720
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,12.484314,0.057769,0.428287,1.223384e+07,0.106227,337.343837,0.543825,5.934263,0.003693,0.168177
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,12.205578,0.064422,0.322108,4.007622e+07,0.141935,337.153989,0.453880,6.005857,0.021209,0.166260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,550.695927,0.004350,0.008171,3.077130e+40,0.177842,648.307098,0.024461,5.434710,0.003792,0.184002
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,221.702500,0.006720,0.017535,5.722434e+36,0.163621,621.595035,0.041069,5.354154,0.003930,0.186771
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,154.974914,0.010159,0.021310,2.518326e+33,0.196174,596.511706,0.051786,5.391959,0.003978,0.185461
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,386.188693,0.009602,0.014627,4.680500e+32,0.219158,592.291922,0.043812,5.378504,0.003585,0.185925


In [20]:
print("done")

done


In [21]:
def get_polarity(text):
  # for each sentence in text calculate the polarity score and then take the average of all the sentences, return the avg of compound, positive, negative, and neutral scores
    scores = [sia.polarity_scores(s) for s in nltk.sent_tokenize(text)]
    avg_scores = {}
    # avging scores for all the sentences
    for key in scores[0].keys():
        avg_scores[key] = np.mean([score[key] for score in scores])
    return(avg_scores)


df_temp = df['text'].parallel_apply(lambda x: pd.Series(get_polarity(x)))
df_temp.columns = [f'polarity_{col}' for col in df_temp.columns]

# # Directly assign to columns in original dataframe
df[df_temp.columns] = df_temp
print('Done!')
df

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=740), Label(value='0 / 740'))), HB…

  df[df_temp.columns] = df_temp
  df[df_temp.columns] = df_temp
  df[df_temp.columns] = df_temp


Done!


Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,complexity_sichel,complexity_honore,complexity_TTR,complexity_chars,complexity_digits,complexity_whiteSpace,polarity_neg,polarity_neu,polarity_pos,polarity_compound
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,0.171271,303.298198,0.595395,6.388158,0.014418,0.156025,0.027529,0.972471,0.000000,-0.040865
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,0.107843,319.990818,0.610778,6.580838,0.011829,0.151501,0.008000,0.992000,0.000000,-0.007900
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,0.101064,321.153243,0.620462,6.359736,0.003633,0.156720,0.000000,0.995833,0.004167,0.030100
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,0.106227,337.343837,0.543825,5.934263,0.003693,0.168177,0.019455,0.976409,0.004136,-0.049609
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,0.141935,337.153989,0.453880,6.005857,0.021209,0.166260,0.005387,0.994613,0.000000,-0.019097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,0.177842,648.307098,0.024461,5.434710,0.003792,0.184002,0.048578,0.944422,0.007034,-0.735121
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,0.163621,621.595035,0.041069,5.354154,0.003930,0.186771,0.056370,0.937365,0.006224,-0.439020
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,0.196174,596.511706,0.051786,5.391959,0.003978,0.185461,0.040629,0.945467,0.010107,-0.327413
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,0.219158,592.291922,0.043812,5.378504,0.003585,0.185925,0.043560,0.948388,0.008067,-0.666806


In [22]:
print("calculated")

calculated


In [23]:
df.to_csv('featured_extracted.csv', index=False)

In [24]:
print("all done collected")

all done collected


In [25]:
candidates = {
    'Myriam Bregman' :    ['Myriam Bregman', 'Miriam Bregman', 'Miriam Breman', 'Myriam Breman', 'Myriam', 'Miriam',],
    'Gabriel Solano' :    ['Gabriel Solano', 'Solano',],
    'Alejandro Biondini' :    ['Alejandro Biondini', 'Biondini',],
    'Juan Schiaretti' :    ['Juan Schiaretti', 'Juan Schriareti', 'Schiareti', 'Squiareti',],
    'Horacio Rodriguez Larreta'  :    ['Horacio Rodriguez Larreta', 'Larreta', 'Lareta', 'Sombrilla', 'Pelado',],
    'Patricia Bullrich' :    ['Patricia Bullrich', 'Bullrich', 'Patricia Bulrich', 'Bulrich', 'Patricia', 'Pato', 'Montonera',],
    'Javier Milei' :    ['Javier Milei', 'Miley', 'Mi ley', 'Milaw', 'Leon', 'Peluca',],
    'Nazareno Etchepare' :    ['Nazareno Etchepare', 'Etchepare',],
    'Ramiro Vasena' :    ['Ramiro Vasena', 'Ramiro basena', 'Basena', 'Vasena',],
    'Manuela Castañeira' :    ['Manuela Castañeira', 'Castañeira', 'Manuela Castanieira', 'Castanieira',],
    'Raul Albarracin' :    ['Raul Albarracin', 'Albarracin', 'Albaracin',],
    'Raul Castells' :    ['Raul Castells', 'Raul Castels', 'Castells', 'Castels',],
    'Santiago Cuneo' :    ['Santiago Cuneo', 'Cuneo',],
    'Jesus Escobar' :    ['Jesus Escobar', 'Escobar',],
    'Marcelo Ramal' :    ['Marcelo Ramal', 'Ramal',],
    'Guillermo Moreno' :    ['Guillermo Moreno', 'Moreno',],
    'Oscar Giardinelli' :    ['Oscar Giardinelli', 'Oscar Giardineli', 'Giardinelli', 'Giardineli',],
    'Martin Ayerbe Ortiz'  :    ['Martin Ayerbe Ortiz', ],
    'Reina Ibañez' :    ['Reina Ibañez', 'Reina Xiomara Ibañez', ],
    'Andres Passamonti' :    ['Andres Passamonti', 'Passamonti', 'Pasamonti', ],
    'Sergio Tomas Massa'  :    ['Sergio Tomas Massa', 'Sergio Massa', 'Sergio Masa', 'Masa', 'Massa', 'Panqueque'],
    'Juan Grabois' :    ['Juan Grabois', 'Grabois', 'Gravois', ],
    'Julio Barbaro' :    ['Julio Barbaro', 'Barbaro', ],
    'Eliodoro Martinez' :    ['Eliodoro Martinez', ],
    'Jorge Oliver' :    ['Jorge Oliver', ],
    'Carolina Bartolini' :    ['Carolina Bartolini', 'Barotilin', ],
    'Paula Arias' :    ['Paula Arias'],
}

df

Unnamed: 0,candidate,source,date,text,liwc_WC,liwc_Funct,liwc_TotPron,liwc_PronPer,liwc_Yo,liwc_Nosotro,...,complexity_sichel,complexity_honore,complexity_TTR,complexity_chars,complexity_digits,complexity_whiteSpace,polarity_neg,polarity_neu,polarity_pos,polarity_compound
0,Alejandro Biondini,c5n,2023-06-27,los precandidatos presidenciales firmarán un c...,304,94,22,14,0,0,...,0.171271,303.298198,0.595395,6.388158,0.014418,0.156025,0.027529,0.972471,0.000000,-0.040865
1,Alejandro Biondini,c5n,2023-06-29,la interna menos pensada: el verborrágico sant...,334,128,22,17,0,1,...,0.107843,319.990818,0.610778,6.580838,0.011829,0.151501,0.008000,0.992000,0.000000,-0.007900
2,Alejandro Biondini,c5n,2023-09-05,"quién es lucía montenegro, una de las anfitrio...",303,126,34,24,0,0,...,0.101064,321.153243,0.620462,6.359736,0.003633,0.156720,0.000000,0.995833,0.004167,0.030100
3,Alejandro Biondini,clarin,2023-01-12,duro intercambio de insultos entre rodolfo tai...,503,226,50,34,0,0,...,0.106227,337.343837,0.543825,5.934263,0.003693,0.168177,0.019455,0.976409,0.004136,-0.049609
4,Alejandro Biondini,clarin,2023-09-29,elecciones 2023: quiénes son los candidatos pr...,683,315,73,54,0,2,...,0.141935,337.153989,0.453880,6.005857,0.021209,0.166260,0.005387,0.994613,0.000000,-0.019097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23660,Sergio Tomas Massa,youtube,2023-10-05,rompecabezas - patricia bullrich pugna por ent...,2027160,1033402,319290,173296,26298,9939,...,0.177842,648.307098,0.024461,5.434710,0.003792,0.184002,0.048578,0.944422,0.007034,-0.735121
23661,Sergio Tomas Massa,youtube,2023-10-06,¿cuáles son los temas del segundo debate presi...,942731,480983,148660,81588,13875,3662,...,0.163621,621.595035,0.041069,5.354154,0.003930,0.186771,0.056370,0.937365,0.006224,-0.439020
23662,Sergio Tomas Massa,youtube,2023-10-07,bullrich y melconian manipulan las palabras de...,543566,276183,85852,47572,7645,2382,...,0.196174,596.511706,0.051786,5.391959,0.003978,0.185461,0.040629,0.945467,0.010107,-0.327413
23663,Sergio Tomas Massa,youtube,2023-10-08,javier milei la rompe en la mesa de mirtha leg...,558254,284541,88755,50304,9294,2625,...,0.219158,592.291922,0.043812,5.378504,0.003585,0.185925,0.043560,0.948388,0.008067,-0.666806
