# Opinion Detection - Unsupervised

In [None]:
import pandas as pd
import numpy as np
import text_normalizer as tn
import nltk
nltk.download('movie_reviews')
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
nltk.download('sentiwordnet')
from nltk.corpus import sentiwordnet as swn

import warnings
warnings.filterwarnings('ignore')

### <font color='orange'>Load and Preprocess Data</font>

In [2]:
from nltk.corpus import movie_reviews as mr
from random import shuffle, seed
seed(42)

documents = [(mr.raw(file_id), 1 if category == 'pos' else 0) for category in mr.categories() for file_id in mr.fileids(category)]
shuffle(documents)

In [3]:
df_train, df_test = documents[:1600], documents[1600:]

X_train = [item[0] for item in df_train]
y_train = [item[1] for item in df_train]

X_test = [item[0] for item in df_test]
y_test = [item[1] for item in df_test]

X_train_cleaned = tn.normalize_corpus(X_train)
X_test_cleaned = tn.normalize_corpus(X_test)

### <font color='orange'>Find best coefficients for POS scores</font>

In [None]:
from textserver import TextServer
ts = TextServer('alberto.jerez6', 'aYe21B5r!', 'senses')
ts.senses('Hello')

In [7]:
default_coef = {tag : 1 for tag in ["NN", "JJ", "VB", "RB"]}
def sentiwordnet_predict(review, coef = default_coef):

    annotated_text = ts.senses(review)
    WSD_text = [[row[2], row[4]] for row in annotated_text[0]] #get tags and synsets of the review

    final_score = token_count = 0
    for tag, synset in WSD_text:
        
        if synset != 'N/A': 
            offset = int(synset[:8]); pos = synset[9]
            synset = wn.synset_from_pos_and_offset(pos, offset)
            sentiSynset = swn.senti_synset(synset.name())

            final_score += coef[tag]*(sentiSynset.pos_score() - sentiSynset.neg_score())
            token_count += 1

    norm_final_score = round(float(final_score)/token_count,3)
    final_sentiment = 1 if norm_final_score>=0 else 0
    return final_sentiment


=== word: happy ===
Positive score:  0.875
Negative score:  0.0
Objective score:  0.125


In [None]:
from sklearn.metrics import accuracy_score
class Sentiwordnet_coef:
    def __init__(self, NN_coef = 1, JJ_coef = 1, VB_coef = 1, RB_coef = 1):
        self._coef = {"NN" : NN_coef, "JJ" : JJ_coef, "VB" : VB_coef, "RB" : RB_coef}

    def score(self, X, y):
        y_pred = [sentiwordnet_predict(review) for review in X]
    
        return accuracy_score(y, y_pred)