In [26]:
import pandas as pd
# from deep_translator import GoogleTranslator # traductor
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer # analyzer
from nltk.corpus import stopwords

# descarga necesaria para correr el analizador de sentimiento
nltk.download('vader_lexicon')
nltk.download('stopwords')

# Cargar el archivo CSV con las reseñas
df_exp_revs = pd.read_csv('./datasets/aus_user_revs.csv')

# Cargar lista de stop words
stop_words = set(stopwords.words('english'))

# Inicializar SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\octav\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\octav\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [27]:
import re

def remove_emoticons(text2):
    # Patrón de expresión regular para detectar emoticonos
    emoticon_pattern = re.compile("["
                                 u"\U0001F600-\U0001F64F"  # Emoticonos de caritas
                                 u"\U0001F300-\U0001F5FF"  # Símbolos y pictogramas
                                 u"\U0001F680-\U0001F6FF"  # Símbolos de transporte y tecnología
                                 u"\U0001F700-\U0001F77F"  # Símbolos de alquimia
                                 u"\U0001F780-\U0001F7FF"  # Símbolos de cartas y dominó
                                 u"\U0001F800-\U0001F8FF"  # Símbolos suplementarios de cartas
                                 u"\U0001F900-\U0001F9FF"  # Símbolos suplementarios y de uso común
                                 u"\U0001FA00-\U0001FA6F"  # Símbolos suplementarios de uso común
                                 u"\U0001FA70-\U0001FAFF"  # Símbolos suplementarios de uso común
                                 u"\U0001F200-\U0001F251"  # Símbolos de la rueda del dharma
                                 "]+", flags=re.UNICODE)
    return emoticon_pattern.sub(r'', text2)

In [29]:
def get_sentiment_value(text):
    if isinstance(text, str):
        text = remove_emoticons(text)
        # Tokenizar y eliminar stop words
        words = nltk.word_tokenize(text)
        words = [word for word in words if word.lower() not in stop_words]
        cleaned_text = ' '.join(words)

        # Realizar análisis de sentimiento
        sentiment_score = sia.polarity_scores(cleaned_text)
        compound_score = sentiment_score['compound']

        if compound_score < -0.1:  # Si el sentimiento es negativo
            return 0
        elif compound_score > 0.1:  # Si el sentimiento es positivo
            return 2
        else:  # Si el sentimiento es neutro
            return 1
        
    else:
        return None  # Valor nulo si no es un texto
    

In [30]:
# Aplicar análisis de sentimiento y asignar valores
# df_exp_revs['sentiment_analysis'] = df_exp_revs['review'].apply(get_sentiment_value)
df_exp_revs['sentiment_analysis'] = df_exp_revs['review'].apply(get_sentiment_value)

# Guardar el DataFrame con la nueva columna en un nuevo archivo CSV
df_exp_revs.to_csv('./datasets/aus_user_revs_with_sentiment.csv', index=False)

In [50]:
df_exp_revs[['review','sentiment_analysis']].sample(n=5,random_state=44574849)

Unnamed: 0,review,sentiment_analysis
4167,wuuuuu i got this ♥♥♥♥♥♥♥ game free see ya in ...,2.0
29846,I got this with the bundle of Worms (tm) games...,0.0
1896,You get to play as Ron Pearlman.What more do y...,2.0
6436,"Alright, i'll be completely honest here.i had ...",2.0
34057,I've only just walked around and run through s...,2.0


In [51]:
df_exp_revs['review'].loc[29846]

"I got this with the bundle of Worms (tm) games a while back - after a bit of research it turns out it's made by Ubisoft for multiple platforms back in the early 2000's (Don't believe the time on record that steam provides, as I managed to find an original copy for the Gamecube recently. I regret that).This really shows - many Ubisoft games that used lisenced franchises like Disney or other big names at the time often had significanly lower quality in-comparison to their own IP's while also putting a 'spin' on whatever popular game mechanics were being used for these 'outsourced' titles (I.e. a Donald Duck themed Crash Bandicoot with a modified health system ran poorly and is easily beaten by the 3D Rayman titles).Ubisoft's 'spin' this time is the momentum of your movement being mixed with Worms (tm) weapons to create a clunky puzzle game that has the main difficulty being setting up your 'answer' to the challenges that it provides instead of the actual puzzles themselves.Also, Worms g

In [20]:
from deep_translator import GoogleTranslator
import pandas as pd

df = pd.DataFrame([("hola mundo","nan"),
                   ("vives la vida como un rockstar?","nan"),
                   ("color del mar","nan")],
                  columns=("transcript",'hello'))

# creamos el objeto que nos permitirá hacer la traducción
translator = GoogleTranslator(source="es", target="en")

# Usamos el método apply de las series en pandas para aplicar a cada valor de la serie una función.
# Esta función será el método translate del objeto translator.
# Luego, reemplazamos la columna original por la modificada
df['hello'] = df['transcript'].apply(translator.translate)

print(df)

                        transcript                              hello
0                       hola mundo                        Hello World
1  vives la vida como un rockstar?  Do you live life like a rockstar?
2                    color del mar                   color of the sea


In [None]:
from deep_translator import GoogleTranslator
import pandas as pd

# Función para traducir fragmentos de texto
def translate_text(text, translator):
    max_length = 5000
    chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
    translated_chunks = [translator.translate(chunk) for chunk in chunks]
    return ''.join(translated_chunks)

# Cargar el archivo CSV con las reseñas
df = pd.read_csv('./datasets/aus_user_revs.csv')

# Creamos el objeto que nos permitirá hacer la traducción
translator = GoogleTranslator(source="es", target="en")

# Aplicar la traducción a las reseñas
df['transcript'] = df['review'].apply(lambda x: translate_text(x, translator) 
                                      if isinstance(x, str) and x.strip() else 1)


# Guardar el DataFrame con las traducciones
df.to_csv('./datasets/aus_user_revs_translated.csv', index=False)