In [15]:
!pip install vaderSentiment

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import wordnet
import nltk
nltk.download('wordnet')


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# Paso 1: Recopilación y preparación de los datos


In [16]:
dataset = pd.read_csv("/content/drive/MyDrive/Colab/Colab Archive/2020-07-20/BERT_sentiment_IMDB_Dataset.csv")
documents = dataset["review"]
labels = dataset["sentiment"]

# Paso 2: Construcción del diccionario de palabras emocionales utilizando WordNet


In [17]:
emotional_words = set()
for synset in wordnet.all_synsets():
    for lemma in synset.lemmas():
        if lemma.antonyms():
            emotional_words.add(lemma.name())

# Paso 3: Construcción de una matriz de documento de palabras utilizando TF-IDF


In [18]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)

# Paso 4: Función de peso (no se implementa en este código)


# Paso 5 y 6: SVD (Descomposición de valores singulares) y Reducción utilizando LSA


In [19]:
lsa = TruncatedSVD(n_components=2)
X_lsa = lsa.fit_transform(X)

# Paso 7: Cálculo de la puntuación emocional del texto utilizando VADER


In [20]:
analyzer = SentimentIntensityAnalyzer()

def classifySentiment(text):
  # Preprocesamiento del texto si es necesario
    sentiment = analyzer.polarity_scores(text)["compound"]
    if sentiment >= 0:
        label = "positivo"
    else:
        label = "negativo"
    return text, label

# Paso 8: Interpretación y análisis de los resultados

In [21]:
review_text = ("Avengers: Infinity War at least had the good taste to abstain from Jeremy Renner. No such luck in Endgame.")
text_representation, predicted_label = classifySentiment(review_text)

print("\nMatriz término-documento reducida con LSA:")
print(X_lsa)
print("\nTexto: ", review_text)
print("\nSentimiento del texto:", predicted_label)


Matriz término-documento reducida con LSA:
[[ 0.34163666  0.0193787 ]
 [ 0.34967024  0.06477418]
 [ 0.33901193  0.0357882 ]
 ...
 [ 0.3903842  -0.01720172]
 [ 0.33053485 -0.0820675 ]
 [ 0.41518012 -0.03906431]]

Texto:  Avengers: Infinity War at least had the good taste to abstain from Jeremy Renner. No such luck in Endgame.

Sentimiento del texto: negativo
