In [62]:
from google.colab import drive
from sys import path
import pandas as pd   
import math

root = '/content/drive/My Drive/nlp-lab'

#dataset = "amazon-english"
#dataset = "organic-train"
dataset = "organic-test"
#dataset = "german2"

In [63]:
drive.mount('/content/drive', force_remount=True)
path.append(root)

Mounted at /content/drive


In [64]:
if dataset == "amazon-english":
  data_location = root + '/data/processed-data/amazon-english'
  data_df = pd.read_json(data_location + '/processed_data_without_embeddings_no_outliers.json')

elif dataset == "organic-train":
  data_location = root + "/data/original-datasets/annotated-organic-dataset/train"
  data_df = pd.read_csv(data_location + '/dataframe.csv', sep='|')
  data_df = data_df.dropna(subset=['Sentence', 'Sentiment'])
  data_df = data_df.rename(columns={"Sentence": "sentence_text", "Sentiment": "comment_sentiment"})

elif dataset == "organic-test":
  data_location = root + "/data/original-datasets/annotated-organic-dataset/test"
  data_df = pd.read_csv(data_location + '/dataframe.csv')
  data_df = data_df.dropna(subset=['Sentence', 'Sentiment'])
  data_df = data_df.rename(columns={"Sentence": "sentence_text", "Sentiment": "comment_sentiment"})

In [67]:
##############
#Prepare data#
##############
data_no_neutral_sentiment_df = data_df[data_df['comment_sentiment'].apply(lambda x: x != "0")]

data_no_neutral_sentiment_x = data_no_neutral_sentiment_df["sentence_text"].values.tolist()
data_no_neutral_sentiment_y = data_no_neutral_sentiment_df["comment_sentiment"].values.tolist()

data_x = data_df["sentence_text"].values.tolist()
data_y = data_df["comment_sentiment"].values.tolist()

###Binary (without neutral sentiment)

In [68]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score
nltk.download('vader_lexicon')
import operator

vader = SentimentIntensityAnalyzer()

def vader_polarity(text):

  score = vader.polarity_scores(text)
  return "p" if score['pos'] > score['neg'] else "n"

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [69]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import multilabel_confusion_matrix

predictions = [vader_polarity(text) for text in data_no_neutral_sentiment_x]
 
print ("accuracy:" + str(accuracy_score(data_no_neutral_sentiment_y, predictions)))
print ("f1_micro:" + str(f1_score(data_no_neutral_sentiment_y, predictions,average='micro')))
print ("f1_macro:" + str(f1_score(data_no_neutral_sentiment_y, predictions,average='macro')))

print ("precision:" + str(precision_score(data_no_neutral_sentiment_y, predictions,average=None)))
print ("recall:" + str(recall_score(data_no_neutral_sentiment_y, predictions,average=None)))

accuracy:0.6555555555555556
f1_micro:0.6555555555555556
f1_macro:0.6496686431810254
precision:[0.54615385 0.75714286]
recall:[0.67619048 0.64242424]


###Multiple Classes (with Neutral sentiment)



In [70]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score
nltk.download('vader_lexicon')
import operator

vader = SentimentIntensityAnalyzer()
alpha = 0.3

def vader_polarity(text, alpha):

  score = vader.polarity_scores(text)

  sentiment = ""
  
  if score['compound'] >= alpha : 
      sentiment = "p"

  elif score['compound'] <= - alpha : 
      sentiment = "n" 

  else : 
      sentiment = "0"

  return sentiment

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [71]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import multilabel_confusion_matrix

predictions = [vader_polarity(text,alpha) for text in data_x]
 
print ("accuracy:" + str(accuracy_score(data_y, predictions)))
print ("f1_micro:" + str(f1_score(data_y, predictions,average='micro')))
print ("f1_macro:" + str(f1_score(data_y, predictions,average='macro')))

print ("precision:" + str(precision_score(data_y, predictions,average=None)))
print ("recall:" + str(recall_score(data_y, predictions,average=None)))

accuracy:0.48043478260869565
f1_micro:0.48043478260869565
f1_macro:0.4427915744611098
precision:[0.45418327 0.52380952 0.50898204]
recall:[0.6        0.20952381 0.51515152]
