In [None]:
#Enfoque basado en modelo de SentiWordNet o SenticNet para español
#Añadiendo reglas heurísticas (negación en ventana de máximo 5 tokens, intensificadores y atenuadores)
#Compara y evalúa cíticamente el rendimiento del modelo sin y con reglas heurísticas (lexicón y lexicón + reglas)
#Para comparar y evaluar, usa un dataset de evaluación: InterTASS, Multilingual Amazon Review sentiment...

In [None]:
#Análisis_del_sentimiento_y_la_connotación_Aprendizaje_basado_en_el_lexicón
#Apartado "Lexicones de polaridad" utiliza el lexicón de SentiWordNet y añade reglas heurísticas, pero es en inglés
#Cuidado porque en español se hace con Spacy en lugar de con NLTK
#SentiWordNet en español en el apartado "Análisis de sentimiento basado en aspecto", dentro de "Lexicones de polaridad"

Ahora vamos a usar SentiWordNet en español. Cargaremos el lexicón.

In [None]:
import csv
import spacy

In [None]:
!python -m spacy download es_core_news_sm

Collecting es-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.7.0/es_core_news_sm-3.7.0-py3-none-any.whl (12.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: es-core-news-sm
Successfully installed es-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('es_core_news_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Cargar el modelo de spaCy para español
nlp_es = spacy.load("es_core_news_sm")

# Función para cargar el lexicón de SentiWordNet en español.
def load_spanish_sentiwordnet(filepath):
    """
    Carga el lexicón en español desde un archivo tab-delimitado con las columnas:
    pos, word_en, word_sp, positive, negative, objective, index, synset, meaning.
    Se utiliza la columna 'word_sp' (convertida a minúsculas) como clave.
    Devuelve un diccionario: {word_sp.lower(): (pos_tag, positive, negative, objective)}.
    """
    lexicon = {}
    with open(filepath, encoding="utf-8") as f:
        reader = csv.DictReader(f, delimiter="\t")
        for row in reader:
            word_sp = row["word_sp"].strip().replace('_', ' ')
            if word_sp == "":
                continue
            try:
                pos_tag = row["pos"].strip().lower()
                pos_score = float(row["positive"].replace("+", ""))
                neg_score = float(row["negative"].replace("-", ""))
                obj_score = float(row["objective"])
            except Exception:
                pos_score, neg_score, obj_score = 0.0, 0.0, 0.0
                pos_tag = ""
            key = word_sp.lower()
            lexicon[key] = (pos_tag, pos_score, neg_score, obj_score)
    return lexicon

In [8]:
# Cargar el lexicón en español desde el mismo cuaderno
# La ruta en la que he colocado el archivo es "/content/sentiwordnet_es.tsv"
es_sentiwordnet = '/content/sentiwordnet_es.tsv'

In [19]:
# Cargar el lexicón (ajusta la ruta al archivo de SentiWordNet en español)
spanish_sentiwordnet = load_spanish_sentiwordnet(es_sentiwordnet)
spanish_sentiwordnet

{'barrer con': ('v', 0.125, 0.0, 0.875),
 'ganar': ('v', 0.375, 0.0, 0.625),
 'abalizar': ('v', 0.125, 0.0, 0.875),
 'ligarse': ('v', 0.125, 0.0, 0.875),
 'unirse': ('v', 0.125, 0.0, 0.875),
 'seguir': ('v', 0.375, 0.0, 0.625),
 'raptar': ('v', 0.0, 0.125, 0.875),
 'cautivar': ('v', 0.125, 0.0, 0.875),
 'deambular': ('v', 0.125, 0.0, 0.875),
 'platyhelminthes': ('n', 0.125, 0.0, 0.875),
 'anomiidae': ('n', 0.125, 0.0, 0.875),
 'pluvialis': ('n', 0.125, 0.0, 0.875),
 'género pluvialis': ('n', 0.125, 0.0, 0.875),
 'kogia': ('n', 0.125, 0.0, 0.875),
 'considerar': ('v', 0.25, 0.0, 0.75),
 'vanessa': ('n', 0.125, 0.0, 0.875),
 'nesokia': ('n', 0.125, 0.0, 0.875),
 'género nesokia': ('n', 0.125, 0.0, 0.875),
 'socorrer': ('v', 0.125, 0.0, 0.875),
 'diaforético': ('a', 0.125, 0.0, 0.875),
 'vacacionar': ('v', 0.125, 0.0, 0.875),
 'sudorífico': ('a', 0.125, 0.0, 0.875),
 'lactogenico': ('a', 0.125, 0.0, 0.875),
 'casa': ('n', 0.125, 0.0, 0.875),
 'celda': ('n', 0.125, 0.0, 0.875),
 'minarete'

In [20]:
def map_spacy_to_wn(lexicon, spacy_to_wn):
#Mapea las etiquetas de Spacy a las etiquetas de wn

    new_lexicon = {}

    for word, (spacy_tag, pos_score, neg_score, obj_score) in lexicon.items():
        if spacy_tag in spacy_to_wn:
            # Obtener la etiqueta en WordNet
            wn_tag = spacy_to_wn[spacy_tag]
            new_lexicon[word] = (wn_tag, pos_score, neg_score, obj_score)
        else:
            # Mantener la etiqueta si no hay mapeo
            new_lexicon[word] = (spacy_tag, pos_score, neg_score, obj_score)

    return new_lexicon

# Diccionario para mapear las etiquetas de spaCy a las etiquetas esperadas en el lexicón
# Asumimos que en el lexicón se utilizan 'n' (noun), 'v' (verb), 'a' (adjective) y 'r' (adverb).
spacy_to_wn = {
    "n": "NOUN",
    "v": "VERB",
    "a": "ADJ",
    "r": "ADV"
}

In [21]:
spanish_sentiwordnet_to_wn = map_spacy_to_wn(spanish_sentiwordnet,spacy_to_wn)
spanish_sentiwordnet_to_wn

{'barrer con': ('VERB', 0.125, 0.0, 0.875),
 'ganar': ('VERB', 0.375, 0.0, 0.625),
 'abalizar': ('VERB', 0.125, 0.0, 0.875),
 'ligarse': ('VERB', 0.125, 0.0, 0.875),
 'unirse': ('VERB', 0.125, 0.0, 0.875),
 'seguir': ('VERB', 0.375, 0.0, 0.625),
 'raptar': ('VERB', 0.0, 0.125, 0.875),
 'cautivar': ('VERB', 0.125, 0.0, 0.875),
 'deambular': ('VERB', 0.125, 0.0, 0.875),
 'platyhelminthes': ('NOUN', 0.125, 0.0, 0.875),
 'anomiidae': ('NOUN', 0.125, 0.0, 0.875),
 'pluvialis': ('NOUN', 0.125, 0.0, 0.875),
 'género pluvialis': ('NOUN', 0.125, 0.0, 0.875),
 'kogia': ('NOUN', 0.125, 0.0, 0.875),
 'considerar': ('VERB', 0.25, 0.0, 0.75),
 'vanessa': ('NOUN', 0.125, 0.0, 0.875),
 'nesokia': ('NOUN', 0.125, 0.0, 0.875),
 'género nesokia': ('NOUN', 0.125, 0.0, 0.875),
 'socorrer': ('VERB', 0.125, 0.0, 0.875),
 'diaforético': ('ADJ', 0.125, 0.0, 0.875),
 'vacacionar': ('VERB', 0.125, 0.0, 0.875),
 'sudorífico': ('ADJ', 0.125, 0.0, 0.875),
 'lactogenico': ('ADJ', 0.125, 0.0, 0.875),
 'casa': ('NOUN'

Ahora vamos a probar el lexicón de SentiWordNet en español para calcular el sentimiento.

Hasta ahora hemos hecho un mapeo entre las etiquetas gramaticales ya incluídas en el lexicón de SentiWordNet a las etiquetas que usa WordNet, centrándonos únicamente en categorías léxicas (nombres, verbos, adjetivos, adverbios). Después, vamos a lematizar los tokens, esto es, quitar cualquier afijo para obtener la raíz de los mismos. Buscaremos el lema en WordNet y obtendremos el significado del primero, que es el más común. Por último, ofrecemos dos maneras de calcular el sentimiento:

1. Mediante la suma de la resta del sentimiento positivo y negativo de cada synset por palabra.
2. Mediante la media de la suma de los sentimientos positivos, negativos y objetivos de todas las palabras, escogiendo el valor más alto de los tres sentimientos.

En el primer caso, consideraremos la misma escala de valores que vimos en VADER:
si s es superior o igual a 0.05, positivo; si s es inferior o igual a -0.05, negativo; en el resto de los casos, neutro.

En el segundo, calcularemos la media de la suma de los scores positivos, negativos y objetivos y tomaremos como polaridad aquel de los tres que tenga un valor mayor.

Ahora vamos a crear una función para cada una de los métodos para calcular el sentimiento y vamos a aplicar tal función al dataset anterior, reutilizando para ello el código del que nos servimos anteriormente para cargar el dataset, visualizarlo, manipularlo, etc.

In [None]:
def compute_sentiment_sum(text):

  doc = nlp_es(text)
  tokenized_sent = [token.text for token in doc]
  print(tokenized_sent)

  pos_tagged_tokens = nltk.pos_tag(tokenized_sent)
  print(pos_tagged_tokens)

####
  pos_score = 0.0
  neg_score = 0.0
  sentiment_value = 0.0
  sentiment_label = ""
  for word, tag in pos_tagged_tokens:
    print(word)
    wn_tag = map_spacy_to_wn(spanish_sentiwordnet,spacy_to_wn)
    print(wn_tag)
    if wn_tag == None:
     continue

    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    print(lemma)
    if not lemma:
      continue

    swn_synsets = list(swn.senti_synsets(lemma, pos=wn_tag))
    print(swn_synsets)
    if len(swn_synsets) == 0:
      continue
    pos_score = swn_synsets[0].pos_score()
    print(pos_score)
    neg_score =  swn_synsets[0].neg_score()
    print(neg_score)
    # podemos probar a calcular la media de la suma de los synsets por palabra y ver si arroja mejores resultados
    #pos_score = sum(synset.pos_score() for synset in swn_synsets)
    #neg_score = sum(synset.neg_score() for synset in swn_synsets)
    sentiment_value += pos_score - neg_score
    print(sentiment_value)

  if sentiment_value >= 0.05:
    sentiment_label = "positive"
  elif sentiment_value <= -0.05:
    sentiment_label = "negative"
  else:
    sentiment_label = "neutral"

  return sentiment_label

def compute_sentiment_mean(text):

  tokenized_sent = nltk.word_tokenize(text)
  pos_tagged_tokens = nltk.pos_tag(tokenized_sent)
  #english_stopwords = set(stopwords.words("english"))
  #pos_tagged_tokens = [word for word in pos_tagged_tokens if word[0] not in english_stopwords]

  pos_score = 0.0
  neg_score = 0.0
  obj_score = 0.0
  num_lemas_con_synset = 0
  sentiment_label = "neutral"

  for word, tag in pos_tagged_tokens:

    wn_tag = penn_to_wn(tag)
    if wn_tag == None:
     continue

    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    if not lemma:
      continue

    swn_synsets = list(swn.senti_synsets(lemma, pos=wn_tag))
    if len(swn_synsets) == 0:
      continue
    # podemos probar a calcular la media de la suma de los synsets por palabra y ver si arroja mejores resultados
    #pos_score += sum(synset.pos_score() for synset in swn_synsets) / len(swn_synsets)
    #neg_score += sum(synset.neg_score() for synset in swn_synsets) / len(swn_synsets)
    #obj_score += sum(synset.obj_score() for synset in swn_synsets) / len(swn_synsets)
    pos_score += swn_synsets[0].pos_score()
    neg_score += swn_synsets[0].neg_score()
    obj_score += swn_synsets[0].obj_score()
    num_lemas_con_synset += 1

  if num_lemas_con_synset > 0:
    pos_score_mean = pos_score / num_lemas_con_synset
    neg_score_mean = neg_score / num_lemas_con_synset
    obj_score_mean = obj_score / num_lemas_con_synset
    max_value = max(pos_score_mean, neg_score_mean, obj_score_mean)

    if max_value == pos_score_mean:
      sentiment_label = "positive"
    elif max_value == neg_score_mean:
      sentiment_label = "negative"

  return sentiment_label

In [None]:
text ="Hoy me he levantado alegre"
#doc = nlp_es(text)
#doc

In [None]:
valor_sentimiento = compute_sentiment_sum(text)
print(valor_sentimiento)

['John', 'is', 'nice', 'and', 'is', 'my', 'new', 'best', 'friend']
[('John', 'NNP'), ('is', 'VBZ'), ('nice', 'JJ'), ('and', 'CC'), ('is', 'VBZ'), ('my', 'PRP$'), ('new', 'JJ'), ('best', 'JJS'), ('friend', 'NN')]
John
n
John
[SentiSynset('toilet.n.01'), SentiSynset('john.n.02'), SentiSynset('john.n.03'), SentiSynset('whoremaster.n.01'), SentiSynset('john.n.05')]
0.0
0.0
0.0
is
v
be
[SentiSynset('be.v.01'), SentiSynset('be.v.02'), SentiSynset('be.v.03'), SentiSynset('exist.v.01'), SentiSynset('be.v.05'), SentiSynset('equal.v.01'), SentiSynset('constitute.v.01'), SentiSynset('be.v.08'), SentiSynset('embody.v.02'), SentiSynset('be.v.10'), SentiSynset('be.v.11'), SentiSynset('be.v.12'), SentiSynset('cost.v.01')]
0.375
0.25
0.125
nice
a
nice
[SentiSynset('nice.a.01'), SentiSynset('decent.s.01'), SentiSynset('nice.s.03'), SentiSynset('dainty.s.04'), SentiSynset('courteous.s.01')]
3.25
0.375
3.0
and
None
is
v
be
[SentiSynset('be.v.01'), SentiSynset('be.v.02'), SentiSynset('be.v.03'), SentiSyns

In [None]:
valor_sentimiento_mean = compute_sentiment_mean(text)
print(valor_sentimiento_mean)

In [None]:
#Función para formatear los datos del dataset (prepararlos)
def format_data(data):

  selected_data = data.iloc[:, [0, -1]]
  selected_data.columns = ['polarity_value', 'tweet_text']
  # Cambiar 0, 2, 4 a negativo, neutral y positivo respecticamente
  labels = {0: 'negative', 2: 'neutral', 4: 'positive'}
  selected_data['polarity_value'] = selected_data['polarity_value'].map(labels)

  # Retornar solo las dos columnas que nos interesan
  return selected_data[['tweet_text', 'polarity_value']]

In [None]:
import pandas as pd

#Cargamos dataset para comprobar los resultados
data_url = "https://raw.githubusercontent.com/keitazoumana/VADER_sentiment-Analysis/main/data/testdata.manual.2009.06.14.csv"
sentiment_data = pd.read_csv(data_url, header=None)

# Aplicar la transformación con la función anterior que definimos más arriba
data = format_data(sentiment_data)
data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_data['polarity_value'] = selected_data['polarity_value'].map(labels)


Unnamed: 0,tweet_text,polarity_value
0,@stellargirl I loooooooovvvvvveee my Kindle2. ...,positive
1,Reading my kindle2... Love it... Lee childs i...,positive
2,"Ok, first assesment of the #kindle2 ...it fuck...",positive
3,@kenburbary You'll love your Kindle2. I've had...,positive
4,@mikefish Fair enough. But i have the Kindle2...,positive
5,@richardebaker no. it is too big. I'm quite ha...,positive
6,Fuck this economy. I hate aig and their non lo...,negative
7,Jquery is my new best friend.,positive
8,Loves twitter,positive
9,how can you not love Obama? he makes jokes abo...,positive


In [None]:
# Ejecutar las predicciones y obtener nuevas columna con dichas predicciones
data["sentiwordnet_sum_prediction"] = data["tweet_text"].apply(compute_sentiment_sum)
data["sentiwordnet_mean_prediction"] = data["tweet_text"].apply(compute_sentiment_mean)

# Mostrar 5 filas aleatorias del dataset
data.sample(5)

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
n
http
[SentiSynset('hypertext_transfer_protocol.n.01')]
0.0
0.0
0.0
:
None
//codylindley.com/jqueryselectors/
n
//codylindley.com/jqueryselectors/
[]
['How', 'to', 'implement', 'a', 'news', 'ticker', 'with', 'jQuery', 'and', 'ten', 'lines', 'of', 'code', 'http', ':', '//bit.ly/CZnFJ']
[('How', 'WRB'), ('to', 'TO'), ('implement', 'VB'), ('a', 'DT'), ('news', 'NN'), ('ticker', 'NN'), ('with', 'IN'), ('jQuery', 'NN'), ('and', 'CC'), ('ten', 'JJ'), ('lines', 'NNS'), ('of', 'IN'), ('code', 'NN'), ('http', 'NN'), (':', ':'), ('//bit.ly/CZnFJ', 'NN')]
How
None
to
None
implement
v
implement
[SentiSynset('implement.v.01'), SentiSynset('enforce.v.01'), SentiSynset('follow_through.v.02')]
0.0
0.0
0.0
a
None
news
n
news
[SentiSynset('news.n.01'), SentiSynset('news.n.02'), SentiSynset('news_program.n.01'), SentiSynset('news.n.04'), SentiSynset('newsworthiness.n.01')]
0.0
0.0
0.0
ticker
n
ticker
[SentiSynset('heart.n.02'), S

Unnamed: 0,tweet_text,polarity_value,sentiwordnet_sum_prediction,sentiwordnet_mean_prediction
398,@sportsguy33 Time Warner = epic fail,negative,neutral,neutral
162,My wrist still hurts. I have to get it looked ...,positive,negative,neutral
302,Now I can see why Dave Winer screams about lac...,negative,positive,neutral
5,@richardebaker no. it is too big. I'm quite ha...,positive,positive,neutral
13,Watchin Espn..Jus seen this new Nike Commerica...,positive,positive,neutral


In [None]:
from sklearn.metrics import classification_report

In [None]:
# Mostrar el informe de clasificación para cada uno de los métodos
print("Resultados del primer método de la suma:")
print(classification_report(data['polarity_value'], data['sentiwordnet_sum_prediction']))
print()
print("Resultados del segundo método de la media:")
print(classification_report(data['polarity_value'], data['sentiwordnet_mean_prediction']))

Resultados del primer método de la suma:
              precision    recall  f1-score   support

    negative       0.69      0.47      0.56       177
     neutral       0.51      0.37      0.43       139
    positive       0.50      0.76      0.61       182

    accuracy                           0.55       498
   macro avg       0.57      0.54      0.53       498
weighted avg       0.57      0.55      0.54       498


Resultados del segundo método de la media:
              precision    recall  f1-score   support

    negative       1.00      0.01      0.01       177
     neutral       0.28      1.00      0.44       139
    positive       1.00      0.01      0.01       182

    accuracy                           0.28       498
   macro avg       0.76      0.34      0.15       498
weighted avg       0.80      0.28      0.13       498



Vamos a añadir algunas reglas heurísticas que tengan en cuenta la negación, intensificadores y atenuadores.

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
from nltk.corpus import sentiwordnet as swn

# Descargas necesarias
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")
nltk.download("stopwords")

# Inicializador del lematizador
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
# Lista de palabras de negación
negations = {"not", "no", "never", "n't", "none", "cannot", "doesn't", "don't", "neither", "nor"}

# Diccionario de modificadores de intensidad (incluye intensificadores y atenuadores)
intensity_modifiers = {
    # Intensificadores (factor > 1)
    "very": 1.5,
    "extremely": 1.8,
    "quite": 1.2,
    "really": 1.3,
    "absolutely": 1.8,
    "so": 1.2,
    "highly": 1.4,
    "incredibly": 1.5,
    # Atenuadores (factor < 1)
    "slightly": 0.8,
    "barely": 0.5,
    "hardly": 0.5,
    "somewhat": 0.7,
    "marginally": 0.6,
}

def compute_sentiment_sum(text):
    tokenized_sent = word_tokenize(text)
    pos_tagged_tokens = pos_tag(tokenized_sent)

    sentiment_value = 0.0

    for i, (word, tag) in enumerate(pos_tagged_tokens):
        wn_tag = penn_to_wn(tag)
        if wn_tag is None:
            continue
        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            continue
        swn_synsets = list(swn.senti_synsets(lemma, pos=wn_tag))
        if len(swn_synsets) == 0:
            continue

        pos_score = swn_synsets[0].pos_score()
        neg_score = swn_synsets[0].neg_score()
        word_sentiment = pos_score - neg_score

        # Heurística de Negación: ventana de 5 tokens a la izquierda
        window_start = max(0, i - 5)
        window = pos_tagged_tokens[window_start:i]
        if any(tok[0].lower() in negations for tok in window):
            word_sentiment = -word_sentiment

        # Heurística de Modificadores de Intensidad/Atenuación:
        if i > 0:
            prev_word = pos_tagged_tokens[i-1][0].lower()
            if prev_word in intensity_modifiers:
                word_sentiment *= intensity_modifiers[prev_word]

        sentiment_value += word_sentiment

    if sentiment_value >= 0.05:
        sentiment_label = "positive"
    elif sentiment_value <= -0.05:
        sentiment_label = "negative"
    else:
        sentiment_label = "neutral"

    return sentiment_label

def compute_sentiment_mean(text):
    tokenized_sent = word_tokenize(text)
    pos_tagged_tokens = pos_tag(tokenized_sent)

    pos_score_total = 0.0
    neg_score_total = 0.0
    obj_score_total = 0.0
    synset_count = 0

    for i, (word, tag) in enumerate(pos_tagged_tokens):
        wn_tag = penn_to_wn(tag)
        if wn_tag is None:
            continue
        lemma = lemmatizer.lemmatize(word, pos=wn_tag)
        if not lemma:
            continue
        swn_synsets = list(swn.senti_synsets(lemma, pos=wn_tag))
        if len(swn_synsets) == 0:
            continue

        pos_score = swn_synsets[0].pos_score()
        neg_score = swn_synsets[0].neg_score()
        obj_score = swn_synsets[0].obj_score()

        # Heurística de Negación: ventana de 5 tokens
        window_start = max(0, i - 5)
        window = pos_tagged_tokens[window_start:i]
        if any(tok[0].lower() in negations for tok in window):
            pos_score, neg_score = neg_score, pos_score  # Intercambia para reflejar la inversión

        # Heurística de Modificadores de Intensidad/Atenuación:
        if i > 0:
            prev_word = pos_tagged_tokens[i-1][0].lower()
            if prev_word in intensity_modifiers:
                multiplier = intensity_modifiers[prev_word]
                pos_score *= multiplier
                neg_score *= multiplier
                obj_score *= multiplier

        pos_score_total += pos_score
        neg_score_total += neg_score
        obj_score_total += obj_score
        synset_count += 1

    if synset_count == 0:
        return "neutral"

    pos_score_mean = pos_score_total / synset_count
    neg_score_mean = neg_score_total / synset_count
    obj_score_mean = obj_score_total / synset_count
    max_value = max(pos_score_mean, neg_score_mean, obj_score_mean)
    if max_value == pos_score_mean:
        sentiment_label = "positive"
    elif max_value == neg_score_mean:
        sentiment_label = "negative"
    else:
        sentiment_label = "neutral"

    return sentiment_label


In [None]:
# Ejemplo de uso:
text_example = "I do not really like this movie. It is not good at all, but it is extremely boring."
print("Sentiment (sum):", compute_sentiment_sum(text_example))
print("Sentiment (mean):", compute_sentiment_mean(text_example))

Sentiment (sum): negative
Sentiment (mean): neutral


In [None]:
# Ejecutar las predicciones y obtener nuevas columna con dichas predicciones
data["sentiwordnet_sum_improved_prediction"] = data["tweet_text"].apply(compute_sentiment_sum)
data["sentiwordnet_mean_improved_prediction"] = data["tweet_text"].apply(compute_sentiment_mean)

# Mostrar 5 filas aleatorias del dataset
data.sample(5)

Unnamed: 0,tweet_text,polarity_value,sentiwordnet_sum_prediction,sentiwordnet_mean_prediction,sentiwordnet_disambiguation_sum_prediction,sentiwordnet_disambiguation_mean_prediction,sentiwordnet_sum_improved_prediction,sentiwordnet_mean_improved_prediction
93,"just got back from church, and I totally hate ...",negative,negative,neutral,positive,neutral,negative,neutral
250,LEbron james got in a car accident i guess..ju...,negative,negative,neutral,positive,neutral,negative,neutral
426,Saved money by opting for grocery store trip a...,neutral,positive,neutral,neutral,neutral,positive,neutral
104,is Twitter's connections API broken? Some twee...,negative,neutral,neutral,positive,neutral,neutral,neutral
134,I saw Night at the Museum: Battle of the Swith...,neutral,positive,neutral,positive,neutral,positive,neutral


In [None]:
# Mostrar el informe de clasificación para cada uno de los métodos
print("Resultados del primer método de la suma:")
print(classification_report(data['polarity_value'], data['sentiwordnet_sum_improved_prediction']))
print()
print("Resultados del segundo método de la media:")
print(classification_report(data['polarity_value'], data['sentiwordnet_mean_improved_prediction']))

Resultados del primer método de la suma:
              precision    recall  f1-score   support

    negative       0.69      0.53      0.60       177
     neutral       0.51      0.37      0.43       139
    positive       0.52      0.75      0.62       182

    accuracy                           0.57       498
   macro avg       0.57      0.55      0.55       498
weighted avg       0.58      0.57      0.56       498


Resultados del segundo método de la media:
              precision    recall  f1-score   support

    negative       1.00      0.01      0.01       177
     neutral       0.28      1.00      0.44       139
    positive       1.00      0.01      0.01       182

    accuracy                           0.28       498
   macro avg       0.76      0.34      0.15       498
weighted avg       0.80      0.28      0.13       498

