# **SENTIMENT ANALYSIS - UNSUPERVISED**

In [None]:
import os
import json
import numpy as np
import spacy
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import sentiwordnet as swn
from nltk.wsd import lesk
from textserver import TextServer
from dotenv import load_dotenv
from sklearn.metrics import accuracy_score

In [None]:
load_dotenv('login.env')
ts_password = os.getenv("PASSWORD_CAI")
ts_user = os.getenv("USER_CAI")

In [None]:
# Load datasets    
with open('./data/X_test.json', 'r') as file:
    X_test = json.load(file)
    
with open('./data/y_test.json', 'r') as file:
    y_test = json.load(file)

In [None]:
ts = TextServer(ts_user, ts_password, 'senses') 

In [None]:
import nltk
from nltk.corpus import stopwords
 
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

example_sent = "i guess that if a very wild bachelor party had gone really bad , there would be broken furniture , traces of smack and cocaine on the floor , and a dead prostitute in the bathroom . i guess that if a movie had also gone really bad , there might be the same elements present . coincidence ? poor kyle ( a meek looking jon favreau ) . . . he is about to marry his radiant fiancee , laura ( cameron diaz ) . but before he exchanges his vows , he embarks to las vegas with his friends for one last blowout . but this bachelor party has gone about as bad as it could possibly get . the prostitute has met a horrible , though accidental death , and drugs are everywhere . the five friends agree that there is enough bad evidence here that will send them to jail for a very long time . a surprisingly calm robert boyd ( christian slater ) , who looks like he was groomed to make nefarious decisions , ponders their dilemma for a few minutes before deciding that the best thing to do is to bury the body in the desert where she ' ll never be found . although they stomach the gruesome deed of getting rid of the body ( which also disturbingly involves dismantling the body using power saws in order to stuff it into suitcases ) , when they return from their trip , guilt and paranoia begins to set in which slowly consumes some of the five friends . one is adam ( daniel stern ) he grows increasingly agitated . whenever people look at his van or whenever a cop glances his way , his blood pressure increases . or that just may be because of his dysfunctional family . another is michael , who was actually responsible for her death . he tries to bury his feelings , but the burden of guilt begins to affect his judgment as well . boyd is the ? doer ' of the group . seemingly suffering from a long psychosis , when he feels as if his secret is about to be exposed , he is apt to take extreme measures to cover up his tracks . kyle just hopes that his wedding will live up to laura ' s demanding expectations . then , there ' s moore ( leland orser ) who speaks 5 lines and walks around with a puzzled look on his face . the problem with this reprehensible movie is that it wants to be a cruel comedy , but it presents things in a manner that just aren ' t funny . drugs , mutilation , and killing your own friends isn ' t something to be laughed at . as a straight psychological drama , i could see how it might have worked , as each one tried to maneuver and overcome the weight of their own guilt in their own sometimes - sick ways . but this movie insults us by assuming that we could simply discard our values for 2 hours . if you do like this movie , i don ' t think that i want to know you . i did find slater a convincing leader who sways his friends to choose not the right thing but the ? smart play . ' and diaz adds some brightness to this film as a wedding - needing fiancee . but her talents are essentially wasted here . it ' s obvious that the film maker is trying to strike a certain tone . but the way that he chooses to do it is tasteless . do not make a very bad decision by seeing this film "

sent_text = nltk.sent_tokenize(example_sent)
sentences = []
no_stopwords_sentences = []
for sentence in sent_text:
    word_tokens = word_tokenize(sentence)

    filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
    filtered_sentence = []
    for w in word_tokens:
        if w not in stop_words:
            filtered_sentence.append(w)
    no_stopwords_sentences.append(filtered_sentence)

In [None]:
lemmatizer = WordNetLemmatizer()

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
def get_sentences(text:str, remove_stopwords:bool = False) -> None:
    sent_list = nltk.sent_tokenize(text)
    if remove_stopwords:
        no_stopwords_sentences = []
        for sentence in sent_text:
            word_tokens = word_tokenize(sentence)

            filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
            filtered_sentence = []
            for w in word_tokens:
                if w not in stop_words:
                    filtered_sentence.append(w)
            no_stopwords_sentences.append(filtered_sentence)
        return no_stopwords_sentences
    else:
        return sent_list

def get_synsets(sentences:list, ts:'TextServer' = ts) -> list:
    r = []
    for sent in sentences:
        a = ts.senses(sent)
        r.append(a)
    return r

def get_lesk_synsets(text:str, lemmatize:bool = False, remove_stopwords:bool = False):
    tokens = word_tokenize(text)
    if lemmatize:
        tokens = [lemmatizer.lemmatize(token) for token in tokens]
    if remove_stopwords:
        tokens = [w for w in tokens if not w.lower() in stop_words]
    tagged_tokens = [(a.text, a.pos_) for a in nlp(text)]
    words = []
    for token, pos in tagged_tokens:
        if pos == "NOUN":
            syn = lesk(tokens, token, pos="n")
        elif pos == "ADJ":
            syn = lesk(tokens, token, pos="a")
        elif pos == "ADV":
            syn = lesk(tokens, token, pos="r")
        elif pos == "VERB":
            syn = lesk(tokens, token, pos="v")
        else:
            syn = None
        if syn is not None:
            words.append(lesk(tokens, token)) if lesk(tokens, token) is not None else None
    return words

def get_lesk_all_synsets(sentences:list) -> list:
    all = []
    for sentence in sentences:
        all.append(get_lesk_synsets(sentence))
    return all
    
def all_synsets():
    for opinion in X_test:
        s = get_sentences(opinion)
        syns = get_synsets(s)

def get_sentiment(synset:'Synset'):
    sentiment = swn.senti_synset(synset.name())
    return (sentiment.pos_score(), sentiment.neg_score()) if sentiment else None


In [None]:
nltk.download('universal_tagset')

In [None]:
s = get_sentences(X_test[0])
print(s)
syns = get_lesk_all_synsets(s)
print(syns)

In [None]:
print(full_sentence)

In [None]:
print(X_test[0])

In [None]:
results = []
for opinion in X_test:
    s = get_sentences(opinion)
    syns = get_lesk_all_synsets(s)
    total_pos = 0
    total_neg = 0
    for sentence in syns:
        scores = [get_sentiment(syn) for syn in sentence if get_sentiment(syn) != None]
        total_pos += sum(s[0] for s in scores)
        total_neg += sum(s[1] for s in scores)
    if total_pos > total_neg:
        # print("Positive")
        results.append(1)
    elif total_pos < total_neg:
        # print("Negative")
        results.append(0)
    else:
        # print("Neutral")
        results.append(0)


In [None]:
print(accuracy_score(y_test, results))