In [None]:
#%pip install textblob
#%pip install deep_translator

In [8]:
import os
import pandas as pd
from textblob import TextBlob
from deep_translator import GoogleTranslator

In [13]:
# Getting the path and the file from the data raw directory
ruta_archivo = os.path.join('..', "data", "raw", "youtube_comments_with_cleaned_ts.csv")
df_sentiment_base = pd.read_csv(ruta_archivo)

In [14]:
def translate_text(text, target_language='en'):
    try:
        # Inicializa el traductor
        translator = GoogleTranslator(target_lang=target_language)
        # Traduce el texto
        translation = translator.translate(text)
        return translation
    except Exception as e:
        print(f"Error al traducir el texto '{text}': {e}")
        return None

In [15]:
df_sentiment_base.shape

(86549, 19)

In [16]:
# CREO LA PRIMERA DIVISION DEL DATAFRAME PARA QUE SE PUEDA TRADUCIR
subset_1 = df_sentiment_base.iloc[0:9999]

In [None]:
# Pido que se cree una nueva columna "translated_text" para la traducción de la columna "comment". 
subset_1['translated_text'] = subset_1['comment'].apply(lambda x: translate_text(x, target_language='en'))

# Da error en aquellos comentarios que tienen una extensión superior a 5000 caracteres. 

In [20]:
# Chequeo que funciono
subset_1.tail(3)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,video_published_at,video_views,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text
0,UgwZJGlfpFvs-r_l-hB4AaABAg,Esta vieja esta más loca cómo una cabra..😂😂😂,UC7s8hGLBfkLOzpho73qs4FQ,@siulenlo8728,2024-08-15 02:14:26+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,2024-01-26 12:00:37+00:00,628428,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,This old lady is as crazy as a goat..😂😂😂
1,UgyEy8OvhoFZrm5GJdd4AaABAg,Cuanto tiempo?,UCAHJ7ysRA9VnBb7JsHBcYPQ,@Raul-bb3nc,2024-05-18 04:07:18+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,2024-01-26 12:00:37+00:00,628428,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,How long?


In [21]:
# "Activo" el analisis de sentimientos. Al final solo uso polarity, que indica sentimientos, y no subjectivity que indica que tan objetivo
#o subjetivo es lo que se dice. 
def get_subjectivity(text):
    return TextBlob(text).sentiment.subjectivity

def get_polarity(text):
    return TextBlob(text).sentiment.polarity

In [None]:
##El analisis se da ppr un rango de -1 a 1. Cuando es 0 es un valor neutral.
##Por eso, defino una función que tome esos valores y los convierta a una clasificación nominal
def analisis(score):
    if score < 0:
        return 'Negativo'
    elif score  > 0:
        return 'Positivo'
    else:
        return 'Neutral'

In [None]:
# Corrijo para  evitar errores en el procesamiento tras la traducción. Los valores None pasan a string vacio
subset_1['translated_text'] = subset_1['translated_text'].fillna('')

# Ahora analiza la polaridad en ingles y sí encuentra un resultado. Si lo analizo en español solo da neutrales
subset_1['polarity'] = subset_1['translated_text'].apply(get_polarity)
subset_1.head(1)

In [None]:
##Aplico la función que nominaliza los resultados. 
subset_1['Sentimiento'] = subset_1['polarity'].apply(analisis)

In [None]:
##Chequeo los resultados
sentimiento_comentario = subset_1.value_counts(subset='Sentimiento')
denominador = subset_1.shape[0]
sentimiento_comentario/denominador

In [22]:
##Guardo el subset

# Relative destination for all the notebooks subset_1
directorio_destino = os.path.join('..', "data", "processed")
subset_1_destino = os.path.join(directorio_destino, 'subset_1_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_1.to_csv(subset_1_destino, index=False, encoding='utf-8')

In [8]:
##REPITO LO ANTERIOR PERO CON UN NUEVO SUBSET
subset_2 = df_sentiment_base.iloc[10000:19999]

In [9]:
subset_2.shape

(9999, 19)

In [None]:
subset_2['translated_text'] = subset_2['comment'].apply(lambda x: translate_text(x, target_language='en'))
subset_2['translated_text'] = subset_2['translated_text'].fillna('')
subset_2['polarity'] = subset_2['translated_text'].apply(get_polarity)
subset_2['Sentimiento'] = subset_2['polarity'].apply(analisis)

In [19]:
sentimiento_comentario = subset_2.value_counts(subset='Sentimiento')
denominador = subset_2.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.425043
Neutral     0.375538
Negativo    0.199420
Name: count, dtype: float64

In [20]:
subset_2.head(3)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
10000,UgxLZTkUeUn3rvFBMdN4AaABAg,Pobre Argentina....llevará años recuperar este...,UCCjbWaOkV-l9AYjYLVNy2SA,@estelalopez5667,2024-05-18 00:03:26+00:00,0,1,True,MILEI ESTALLÓ EN ESPAÑA EN PLENA PRESENTACIÓN ...,El Peluca Milei,...,22256,Si,Crisis diplomatica España,Desfavorable,A favor,0 days 00:53:43,3223.0,Poor Argentina...it will take years to recover...,-0.182812,Negativo
10001,UgxLZTkUeUn3rvFBMdN4AaABAg.A3ZCRiq1NFPA3ZL6g8FJ7u,La riqueza de Argentina os la robaron durante ...,UC8I9xFPBRRTh_eIjcguU1Tg,@Aguamarina2011,2024-05-18 01:19:12+00:00,0,0,False,MILEI ESTALLÓ EN ESPAÑA EN PLENA PRESENTACIÓN ...,El Peluca Milei,...,22256,Si,Crisis diplomatica España,Desfavorable,A favor,0 days 00:53:43,3223.0,Argentina's wealth was stolen from you for mor...,0.25,Positivo
10002,Ugx9kP6_S8DPJ3TfiC14AaABAg,Idolo mi presidente ❤,UCmbkdIwg--_En03OyPho0LA,@roxanavaldez6669,2024-05-18 00:03:19+00:00,0,0,True,MILEI ESTALLÓ EN ESPAÑA EN PLENA PRESENTACIÓN ...,El Peluca Milei,...,22256,Si,Crisis diplomatica España,Desfavorable,A favor,0 days 00:53:43,3223.0,I idolize my president ❤,0.0,Neutral


In [21]:
##Guardo el subset

# Relative destination for subset_2
subset_2_destino = os.path.join(directorio_destino, 'subset_2_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_2.to_csv(subset_2_destino, index=False, encoding='utf-8')

In [10]:
subset_3 = df_sentiment_base.iloc[20000:29999]
subset_3['translated_text'] = subset_3['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [13]:
subset_3['translated_text'] = subset_3['translated_text'].fillna('')
subset_3['polarity'] = subset_3['translated_text'].apply(get_polarity)
subset_3['Sentimiento'] = subset_3['polarity'].apply(analisis)
subset_3.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_3['translated_text'] = subset_3['translated_text'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_3['polarity'] = subset_3['translated_text'].apply(get_polarity)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_3['Sentimiento'] = subset_3['polarity'].apply(analisis)


Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
20000,UgzT_aJwGQhdE-5PNR54AaABAg,Y los gritos?,UCN_nVM1YZFylFLSW4dqBh9Q,@Vallhansa,2024-06-03 00:26:02+00:00,0,0,True,MILEI A LOS GRITOS EN MITRE TRAS EL ESCÁNDALO ...,El Peluca Milei,...,10204,No,Crisis alimentos Capital Humano,Desfavorable,A favor,0 days 00:48:36,2916.0,And the screams?,0.0,Neutral
20001,UgyQCpxofFX1sxUFyWB4AaABAg,Que Dios. Le de mucha paciencia mi presidente....,UCQgEZJ_4IPGuN5yu5s3DdDQ,@claudiabanegas5156,2024-06-03 00:25:32+00:00,17,0,True,MILEI A LOS GRITOS EN MITRE TRAS EL ESCÁNDALO ...,El Peluca Milei,...,10204,No,Crisis alimentos Capital Humano,Desfavorable,A favor,0 days 00:48:36,2916.0,"May God give you a lot of patience, my preside...",0.0,Neutral


In [14]:
sentimiento_comentario = subset_3.value_counts(subset='Sentimiento')
denominador = subset_3.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.448345
Neutral     0.379238
Negativo    0.172417
Name: count, dtype: float64

In [15]:
##Guardo el subset

# Relative destination for subset_3
subset_3_destino = os.path.join(directorio_destino, 'subset_3_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_3.to_csv(subset_3_destino, index=False, encoding='utf-8')

In [28]:
favorable = subset_3[subset_3['tipo_evento'] == 'Favorable']
sentimiento_comentario = favorable.value_counts(subset='Sentimiento')
denominador = favorable.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.535318
Neutral     0.315236
Negativo    0.149445
Name: count, dtype: float64

In [29]:
desfavorable = subset_3[subset_3['tipo_evento'] == 'Desfavorable']
sentimiento_comentario = desfavorable.value_counts(subset='Sentimiento')
denominador = favorable.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.398132
Neutral     0.336836
Negativo    0.206071
Name: count, dtype: float64

In [30]:
neutral = subset_3[subset_3['tipo_evento'] == 'Neutral']
sentimiento_comentario = neutral.value_counts(subset='Sentimiento')
denominador = neutral.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.432125
Neutral     0.400809
Negativo    0.167066
Name: count, dtype: float64

In [31]:
afavor = subset_3[subset_3['condiciones_cuenta'] == 'A favor']
sentimiento_comentario = afavor.value_counts(subset='Sentimiento')
denominador = afavor.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.448345
Neutral     0.379238
Negativo    0.172417
Name: count, dtype: float64

In [34]:
libro = subset_3[subset_3['evento'] == 'Presentación de libro']
sentimiento_comentario = libro.value_counts(subset='Sentimiento')
denominador = libro.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.535318
Neutral     0.315236
Negativo    0.149445
Name: count, dtype: float64

In [5]:
subset_4 = df_sentiment_base.iloc[30000:39999]
subset_4['translated_text'] = subset_4['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [None]:
subset_4['translated_text'] = subset_4['translated_text'].fillna('')
subset_4['polarity'] = subset_4['translated_text'].apply(get_polarity)
subset_4['Sentimiento'] = subset_4['polarity'].apply(analisis)

In [12]:
subset_4.tail(4)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
39995,UgytB5sbtpcUcdvRq9d4AaABAg,Milei sin lugar a dudas debilitó en el día de ...,UCGKK5rFtErdlIBR1BI13u7Q,@carlosvilabarea1367,2024-04-24 13:21:59+00:00,0,0,True,Marcha universitaria: los números detrás del r...,LA NACION,...,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0,Milei has undoubtedly weakened the agreements ...,0.117963,Positivo
39996,UgwNfZ_HmAUKpJNQsnR4AaABAg,Acuerdencen que a Milei lo votaron 14 millones,UCcpWvYIzAATlRqLUH5tx-1w,@celinaortiz9624,2024-04-24 13:19:38+00:00,0,0,True,Marcha universitaria: los números detrás del r...,LA NACION,...,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0,Remember that 14 million people voted for Milei,0.0,Neutral
39997,Ugz6TrBMZhaocUMh3qd4AaABAg,Dejen de decir que los votantes de Milei estab...,UCcpWvYIzAATlRqLUH5tx-1w,@celinaortiz9624,2024-04-24 13:16:37+00:00,0,0,True,Marcha universitaria: los números detrás del r...,LA NACION,...,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0,Stop saying that Milei's voters were at the ma...,-0.083333,Negativo
39998,UgwQtr104ygioxYsmn14AaABAg,"Antes de que se vaya el cabezón del estudio, m...",UCDISQvn25PqXGmAtU556-cQ,@tonysantocastano7742,2024-04-24 13:15:20+00:00,0,0,True,Marcha universitaria: los números detrás del r...,LA NACION,...,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0,"Before the big head leaves the studio, make su...",-0.02,Negativo


In [13]:
sentimiento_comentario = subset_4.value_counts(subset='Sentimiento')
denominador = subset_4.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.442144
Neutral     0.369237
Negativo    0.188619
Name: count, dtype: float64

In [14]:
##Guardo el subset

# Relative destination for subset_3
subset_4_destino = os.path.join(directorio_destino, 'subset_4_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_4.to_csv(subset_4_destino, index=False, encoding='utf-8')

In [16]:
marcha = subset_4[subset_4['evento'] == 'Marcha federal universitaria']
sentimiento_comentario = marcha.value_counts(subset='Sentimiento')
denominador = marcha.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.402271
Neutral     0.398685
Negativo    0.199044
Name: count, dtype: float64

In [17]:
subset_5 = df_sentiment_base.iloc[40000:49999]
subset_5.shape

(9999, 19)

In [18]:
subset_5.head(2)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,video_published_at,video_views,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds
40000,UgwH3OqMwF221Bz6CT14AaABAg,El gobierno y los universitarios fueron &quot;...,UC4_wwdbSqElQX73Oqax8Vtg,@i.p.8421,2024-04-24 13:03:51+00:00,0,0,True,Marcha universitaria: los números detrás del r...,LA NACION,2024-04-24 01:15:42+00:00,53334,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0
40001,UgwDRthVQPbxz5q6or54AaABAg,Si no hay auditoría como pueden decir si el au...,UCZc7IIBUs6CHw_Tj0o-VHog,@jorgegarcia-rf9pk,2024-04-24 12:54:13+00:00,2,1,True,Marcha universitaria: los números detrás del r...,LA NACION,2024-04-24 01:15:42+00:00,53334,919,Si,Marcha federal universitaria,Desfavorable,Neutral,0 days 00:53:45,3225.0


In [None]:
subset_5['translated_text'] = subset_5['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [25]:
subset_5['translated_text'] = subset_5['translated_text'].fillna('')
subset_5['polarity'] = subset_5['translated_text'].apply(get_polarity)
subset_5['Sentimiento'] = subset_5['polarity'].apply(analisis)
subset_5.tail(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_5['translated_text'] = subset_5['translated_text'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_5['polarity'] = subset_5['translated_text'].apply(get_polarity)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_5['Sentimiento'] = subset_5['polarity'].apply(analisis)


Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
49997,UgyGHMqyiqeH4QvFgu14AaABAg,"Me hace acordar a mi ex pastor evangélico, nos...",UClLBOFpUbijtH1J6P8OoZuw,@richardordenanz8294,2024-05-23 02:16:53+00:00,4,8,True,EN VIVO | Habla Javier Milei en la presentació...,LA NACION,...,5116,Si,Presentación de libro,Favorable,Neutral,0 days 03:24:25,12265.0,"It reminds me of my former evangelical pastor,...",0.0,Neutral
49998,UgyGHMqyiqeH4QvFgu14AaABAg.A3lJgZOOwonA3lKNxzxDMd,La diferencia es que esto es ciencia politica ...,UC1nZe3l_PnegPOpNhpJwj_A,@anapichinini9658,2024-05-23 02:22:57+00:00,0,0,False,EN VIVO | Habla Javier Milei en la presentació...,LA NACION,...,5116,Si,Presentación de libro,Favorable,Neutral,0 days 03:24:25,12265.0,The difference is that this is exact political...,0.2125,Positivo


In [26]:
sentimiento_comentario = subset_5.value_counts(subset='Sentimiento')
denominador = subset_5.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.391139
Neutral     0.385639
Negativo    0.223222
Name: count, dtype: float64

In [27]:
marcha = subset_5[subset_5['evento'] == 'Marcha federal universitaria']
sentimiento_comentario = marcha.value_counts(subset='Sentimiento')
denominador = marcha.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.388407
Positivo    0.371073
Negativo    0.240520
Name: count, dtype: float64

In [28]:
##Guardo el subset

# Relative destination for subset_3
subset_5_destino = os.path.join(directorio_destino, 'subset_5_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_5.to_csv(subset_5_destino, index=False, encoding='utf-8')

In [8]:
subset_6 = df_sentiment_base.iloc[50000:59999]
subset_6['translated_text'] = subset_6['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [11]:
subset_6['translated_text'] = subset_6['translated_text'].fillna('')
subset_6['polarity'] = subset_6['translated_text'].apply(get_polarity)
subset_6['Sentimiento'] = subset_6['polarity'].apply(analisis)
subset_6.tail(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_6['translated_text'] = subset_6['translated_text'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_6['polarity'] = subset_6['translated_text'].apply(get_polarity)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_6['Sentimiento'] = subset_6['polarity'].apply(analisis)


Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
59997,Ugxht07MM5EwvIk98BN4AaABAg,Como si el problema de las universidades fuera...,UCm46Nui3sGhN2YGSH0pzaAw,@malesan9670,2024-05-17 18:13:38+00:00,0,0,True,¡QUÉ JUGADORA! Estudiante dejó PEDALEANDO EN E...,Revolución Popular Noticias,...,4810,Si,Marcha federal universitaria,Desfavorable,En contra,0 days 00:07:47,467.0,As if the problem of universities were foreign...,0.05,Positivo
59998,Ugw02ZpEMWe8ZPfVFZZ4AaABAg,"Los extranjeros que estudian pagan IVA, como t...",UCFeziiiUqJG-92lcuq8aNsQ,@lai1038,2024-05-14 09:09:02+00:00,0,0,True,¡QUÉ JUGADORA! Estudiante dejó PEDALEANDO EN E...,Revolución Popular Noticias,...,4810,Si,Marcha federal universitaria,Desfavorable,En contra,0 days 00:07:47,467.0,"Foreigners who study pay VAT, like everyone el...",-0.03125,Negativo


In [12]:
sentimiento_comentario = subset_6.value_counts(subset='Sentimiento')
denominador = subset_6.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.431643
Positivo    0.368637
Negativo    0.199720
Name: count, dtype: float64

In [13]:
marcha = subset_6[subset_6['evento'] == 'Marcha federal universitaria']
sentimiento_comentario = marcha.value_counts(subset='Sentimiento')
denominador = marcha.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.497685
Positivo    0.273148
Negativo    0.229167
Name: count, dtype: float64

In [14]:
##Guardo el subset

# Relative destination for subset_3
subset_6_destino = os.path.join(directorio_destino, 'subset_6_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_6.to_csv(subset_6_destino, index=False, encoding='utf-8')

In [None]:
subset_7 = df_sentiment_base.iloc[60000:79999]
subset_7['translated_text'] = subset_7['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [17]:
subset_7['translated_text'] = subset_7['translated_text'].fillna('')
subset_7['polarity'] = subset_7['translated_text'].apply(get_polarity)
subset_7['Sentimiento'] = subset_7['polarity'].apply(analisis)
subset_7.tail(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_7['translated_text'] = subset_7['translated_text'].fillna('')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_7['polarity'] = subset_7['translated_text'].apply(get_polarity)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset_7['Sentimiento'] = subset_7['polarity'].apply(analisis)


Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
79997,UgyUuTU0m2M4rQjYumx4AaABAg,La mayoria de la gente que estaba en esa &quot...,UCRx0pnszHUQkvi5CLkxqflQ,@LightSpark.,2024-04-24 22:08:57+00:00,1,0,True,"Juan Grabois: ""La Marcha Federal Universitaria...",El Destape,...,454,Si,Marcha federal universitaria,Desfavorable,En contra,0 days 00:18:55,1135.0,Most of the people who were in that &quot;&quo...,0.158333,Positivo
79998,UgytqvgiiDluaVTc8kJ4AaABAg,"<a href=""https://www.youtube.com/watch?v=gsgxo...",UCZYNzXq8eBQIL76RPh9XIrw,@albertooscardominguez685,2024-04-24 22:04:38+00:00,4,0,True,"Juan Grabois: ""La Marcha Federal Universitaria...",El Destape,...,454,Si,Marcha federal universitaria,Desfavorable,En contra,0 days 00:18:55,1135.0,"<a href=""https://www.youtube.com/watch?v=gsgxo...",0.0,Neutral


In [19]:
sentimiento_comentario = subset_7.value_counts(subset='Sentimiento')
denominador = subset_7.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.434622
Positivo    0.357018
Negativo    0.208360
Name: count, dtype: float64

In [20]:
##Guardo el subset

# Relative destination for subset_3
subset_7_destino = os.path.join(directorio_destino, 'subset_7_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_7.to_csv(subset_7_destino, index=False, encoding='utf-8')

In [27]:
subset_8 = df_sentiment_base.iloc[80000:86549]
subset_8.tail (2)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,video_published_at,video_views,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds
86547,Ugy_3POupZ7OnaHxYW94AaABAg,"Si te tiran veneno , q no esté vencido?! Es e...",UCUmxUOorTMXQZd0y5y7mL2Q,@inesguilon9505,2024-06-13 20:49:26+00:00,16,0,True,"EN PIE, A PESAR DE TODO | Editorial de Darío V...",El Destape,2024-06-13 20:30:02+00:00,9031,546,Si,Aprobación total Ley bases,Favorable,En contra,0 days 00:17:33,1053.0
86548,UgxkQ3hf2dIY0Cl1qI94AaABAg,Dario no hay nada mejor que un pueblo pobre e ...,UCxEDb3DAUXbIxV8M4ynX4cA,@user-mq5gb4xj3x,2024-06-13 20:46:50+00:00,18,0,True,"EN PIE, A PESAR DE TODO | Editorial de Darío V...",El Destape,2024-06-13 20:30:02+00:00,9031,546,Si,Aprobación total Ley bases,Favorable,En contra,0 days 00:17:33,1053.0


In [None]:
subset_8['translated_text'] = subset_8['comment'].apply(lambda x: translate_text(x, target_language='en'))

In [None]:
subset_8['translated_text'] = subset_8['translated_text'].fillna('')
subset_8['polarity'] = subset_8['translated_text'].apply(get_polarity)
subset_8['Sentimiento'] = subset_8['polarity'].apply(analisis)
subset_8.tail(2)

In [31]:
sentimiento_comentario = subset_8.value_counts(subset='Sentimiento')
denominador = subset_8.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.403573
Positivo    0.340510
Negativo    0.255917
Name: count, dtype: float64

In [32]:
##Guardo el subset

# Relative destination for subset_3
subset_8_destino = os.path.join(directorio_destino, 'subset_8_score.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
subset_8.to_csv(subset_8_destino, index=False, encoding='utf-8')

In [38]:
##Uno los diferentes subsets en la base original
dfscore = pd.concat([subset_1, subset_2, subset_3, subset_4, subset_5, subset_6, subset_7, subset_8], axis=0, ignore_index=True)

In [39]:
dfscore.shape

(86542, 22)

In [40]:
dfscore.head(10)

Unnamed: 0,comment_id,comment,user_id,user_name,comment_time,comment_likes,total_reply_count,is_top_level_comment,video_title,channel_title,...,video_likes,relacion_evento,evento,tipo_evento,condiciones_cuenta,duration_timedelta,duration_seconds,translated_text,polarity,Sentimiento
0,UgwZJGlfpFvs-r_l-hB4AaABAg,Esta vieja esta más loca cómo una cabra..😂😂😂,UC7s8hGLBfkLOzpho73qs4FQ,@siulenlo8728,2024-08-15 02:14:26+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,This old lady is as crazy as a goat..😂😂😂,-0.25,Negativo
1,UgyEy8OvhoFZrm5GJdd4AaABAg,Cuanto tiempo?,UCAHJ7ysRA9VnBb7JsHBcYPQ,@Raul-bb3nc,2024-05-18 04:07:18+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,How long?,-0.05,Negativo
2,UgwHEuRpsRRYPMdUIgl4AaABAg,DEMENTE,UCpkOnRIXHKSxUTB_o670KUQ,@DanielaMagnetto,2024-05-10 15:27:31+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,DEMENTED,0.0,Neutral
3,UgxwuSoMHOuFhLJq6HR4AaABAg,Metete las remeras y los gorros en el Q lo 😂,UCpkOnRIXHKSxUTB_o670KUQ,@DanielaMagnetto,2024-05-10 15:11:56+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,Put your shirts and hats in your Q lo 😂,0.0,Neutral
4,UgycBcRrtu-sx9tHI-N4AaABAg,Que hermoso querido presidente.,UC3a1y33-3H-9J4MEq-fnh8g,@JojoRD-1023,2024-05-04 00:24:26+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,"How beautiful, dear president.",0.85,Positivo
5,UgwYAvyfqDyz7Aj1kA54AaABAg,Con que pago el avion,UCGcFFZunCnsLrZOcorrB6mQ,@susanaramon3592,2024-04-29 01:19:14+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,With what do I pay for the plane?,0.0,Neutral
6,UgzS7nHSWpviBBgTgPd4AaABAg,Y tu una comunista !!,UC7S5-M1YlKOnpPBBbUhjhtg,@iosefkuba6961,2024-04-23 17:20:09+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,And you are a communist!!,0.0,Neutral
7,Ugy-hkUxFg2jI7oW90J4AaABAg,domada,UClH0QE3-ZMTDIB5jlXffsrA,@juan1068,2024-04-06 23:35:28+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,tamed,0.0,Neutral
8,UgzwM35Pu5-zYupfYRR4AaABAg,Apúrate a vender todo el merchandising porque ...,UCbksXCGbyxQn4i_IlUKXGpA,@diegosuarez5620,2024-03-19 02:24:17+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,Hurry up and sell all the merchandise because ...,0.05,Positivo
9,UgzQOH3Zo7vHQZkpId94AaABAg,Los extranjeros que tienen mitad de su poblaci...,UCWQAR-Po804dPlxSEQVRjLA,@juanpablorondeau5342,2024-03-09 16:59:31+00:00,0,0,True,MILEI SE CRUZÓ MAL CON PERIODISTA QUE NO PARÓ ...,El Peluca Milei,...,23596,No,Primer paro nacional,Desfavorable,A favor,0 days 00:41:21,2481.0,Foreigners who have half of their population l...,0.311111,Positivo


In [41]:
sentimiento_comentario = dfscore.value_counts(subset='Sentimiento')
denominador = dfscore.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.398107
Positivo    0.395646
Negativo    0.206247
Name: count, dtype: float64

In [42]:
afavor = dfscore[dfscore['condiciones_cuenta'] == 'A favor']
sentimiento_comentario = afavor.value_counts(subset='Sentimiento')
denominador = afavor.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.436151
Neutral     0.375739
Negativo    0.188111
Name: count, dtype: float64

In [43]:
encontra = dfscore[dfscore['condiciones_cuenta'] == 'En contra']
sentimiento_comentario = encontra.value_counts(subset='Sentimiento')
denominador = encontra.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.429491
Positivo    0.343021
Negativo    0.227488
Name: count, dtype: float64

In [48]:
eventoneg = dfscore[dfscore['tipo_evento'] == 'Desfavorable']
sentimiento_comentario = eventoneg.value_counts(subset='Sentimiento')
denominador = eventoneg.shape[0]
sentimiento_comentario/denominador

Sentimiento
Neutral     0.400371
Positivo    0.377973
Negativo    0.221655
Name: count, dtype: float64

In [49]:
eventopos = dfscore[dfscore['tipo_evento'] == 'Favorable']
sentimiento_comentario = eventopos.value_counts(subset='Sentimiento')
denominador = eventopos.shape[0]
sentimiento_comentario/denominador

Sentimiento
Positivo    0.416734
Neutral     0.389157
Negativo    0.194109
Name: count, dtype: float64

In [None]:
##Guardo el subset

# Relative destination for subset_3
dfscore_destino = os.path.join(directorio_destino, 'dfscore.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
dfscore.to_csv(dfscore_destino, index=False, encoding='utf-8')