## Load dataset and take sample reviews

In [2]:
import pandas as pd

df = pd.read_csv('../dataIngestion/dataset.csv')
df.head()

Unnamed: 0,app_id,app_name,review_text,review_score,review_votes
0,10,Counter-Strike,Ruined my life.,1,0
1,10,Counter-Strike,This will be more of a ''my experience with th...,1,1
2,10,Counter-Strike,This game saved my virginity.,1,0
3,10,Counter-Strike,• Do you like original games? • Do you like ga...,1,0
4,10,Counter-Strike,"Easy to learn, hard to master.",1,1


In [60]:
## Sample reviews with equal number of positive and negative reviews

N = 1000  # number of reviews from each class
df['review_text'].fillna('', inplace=True)
positive_reviews = df[(df['review_score'] == 1) & (~df['review_text'].str.contains("Early Access Review"))].sample(N, random_state=1)
negative_reviews = df[(df['review_score'] == -1) & (~df['review_text'].str.contains("Early Access Review"))].sample(N, random_state=1)
df_sample = pd.concat([positive_reviews, negative_reviews])
df_sample = df_sample.reset_index(drop=True)
df_sample.head()

Unnamed: 0,app_id,app_name,review_text,review_score,review_votes
0,292030,The Witcher 3: Wild Hunt,It's so coooool!!,1,0
1,231160,The Swapper,"Buy it, maybe 14€ are too much for a game that...",1,0
2,270550,Yet Another Zombie Defense,Excellent for the price,1,0
3,219640,Chivalry: Medieval Warfare,"Very, very fun and immersive. This game can ho...",1,0
4,15500,The Wonderful End of the World,this is fun,1,0


## Flair NLP functions

In [79]:
from flair.nn import Classifier
from flair.data import Sentence
from segtok.segmenter import split_single

classifier = Classifier.load('sentiment')

def predict(sentence):
    """ Predict the sentiment of a sentence """
    if sentence == "":
        return 0
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    try:
        value = text.labels[0].to_dict()['value'] 
        if value == 'POSITIVE':
            result = text.to_dict()['labels'][0]['confidence']
        else:
            result = -(text.to_dict()['labels'][0]['confidence'])
        return round(result, 3)
    except IndexError:
        return 0


def normalize_predictions(score):
    if score > 0:
        return 1
    else:
        return -1

## Predictions on raw texts

In [62]:
df_flair_raw = df_sample.copy()
df_flair_raw['flair_score'] = df_flair_raw.review_text.apply(predict)
df_flair_raw['flair_scores_normalized'] = df_flair_raw.flair_score.apply(normalize_predictions)
df_flair_raw.head()


Unnamed: 0,app_id,app_name,review_text,review_score,review_votes,flair_score,flair_scores_normalized
0,292030,The Witcher 3: Wild Hunt,It's so coooool!!,1,0,1.0,1
1,231160,The Swapper,"Buy it, maybe 14€ are too much for a game that...",1,0,-0.997,-1
2,270550,Yet Another Zombie Defense,Excellent for the price,1,0,0.993,1
3,219640,Chivalry: Medieval Warfare,"Very, very fun and immersive. This game can ho...",1,0,0.984,1
4,15500,The Wonderful End of the World,this is fun,1,0,0.977,1


In [63]:
from sklearn.metrics import confusion_matrix

def print_confusion_matrix(df):
    confusion = confusion_matrix(df['review_score'], df['flair_scores_normalized'])
    confusion_df = pd.DataFrame(confusion, columns=["Predicted -1", "Predicted 1"], index=["Actual -1", "Actual 1"])

    return confusion_df


def calculate_metrics(confusion_df):
    TP = confusion_df.at['Actual 1', 'Predicted 1']
    TN = confusion_df.at['Actual -1', 'Predicted -1']
    FP = confusion_df.at['Actual -1', 'Predicted 1']
    FN = confusion_df.at['Actual 1', 'Predicted -1']

    accuracy = (TP + TN) / (TP + TN + FP + FN)
    precision = TP/(TP + FP)
    recall = TP/(TP + FN)
    F1 = (2*precision*recall)/(precision + recall)

    return(accuracy, precision, recall, F1)

In [64]:
confusion_flair_raw = print_confusion_matrix(df_flair_raw)
print(confusion_flair_raw)

           Predicted -1  Predicted 1
Actual -1           943           57
Actual 1            232          768


In [65]:
(accuracy, precision, recall, F1) = calculate_metrics(confusion_flair_raw)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1: {F1:.3f}")

Accuracy: 0.856
Precision: 0.931
Recall: 0.768
F1: 0.842


## Predictions with cleaned data

In [66]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mlisc\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mlisc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [67]:
from dataIngestion.dataset_clean import cleaning

df_flair_cleaned = df_sample.copy()
cleaning(df_flair_cleaned, "review_text")
df_flair_cleaned.head()

Unnamed: 0,app_id,app_name,review_text,review_score,review_votes
0,292030,The Witcher 3: Wild Hunt,coooool,1,0
1,231160,The Swapper,buy mayb much game last hour without troubl ga...,1,0
2,270550,Yet Another Zombie Defense,excel price,1,0
3,219640,Chivalry: Medieval Warfare,fun immers game hold player fight consist hold...,1,0
4,15500,The Wonderful End of the World,fun,1,0


In [68]:
df_flair_cleaned['flair_score'] = df_flair_cleaned.review_text.apply(predict)
df_flair_cleaned['flair_scores_normalized'] = df_flair_cleaned.flair_score.apply(normalize_predictions)
df_flair_cleaned.head()

Unnamed: 0,app_id,app_name,review_text,review_score,review_votes,flair_score,flair_scores_normalized
0,292030,The Witcher 3: Wild Hunt,coooool,1,0,0.977,1
1,231160,The Swapper,buy mayb much game last hour without troubl ga...,1,0,0.844,1
2,270550,Yet Another Zombie Defense,excel price,1,0,0.999,1
3,219640,Chivalry: Medieval Warfare,fun immers game hold player fight consist hold...,1,0,0.648,1
4,15500,The Wonderful End of the World,fun,1,0,0.987,1


In [69]:
confusion_flair_cleaned = print_confusion_matrix(df_flair_cleaned)
print(confusion_flair_cleaned)

           Predicted -1  Predicted 1
Actual -1           817          183
Actual 1            341          659


In [70]:
(accuracy, precision, recall, F1) = calculate_metrics(confusion_flair_cleaned)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1: {F1:.3f}")

Accuracy: 0.738
Precision: 0.783
Recall: 0.659
F1: 0.716


## Evaluate performance after each cleaning step

In [80]:
from dataIngestion.dataset_clean import *
cleaning_steps = [clean_hyperlinks_and_markup, deEmojify, remove_num, remove_symbols, remove_punctuation,
                  remove_stopword, unify_whitespaces, stemming]

i = 1
for step in cleaning_steps:
    df_temp = df_sample.copy()
    df_temp['review_text'] = df_temp['review_text'].apply(step)
    #df_temp.head()
    df_temp['flair_score'] = df_temp['review_text'].apply(predict)
    df_temp['flair_scores_normalized'] = df_temp['flair_score'].apply(normalize_predictions)

    confusion_df = print_confusion_matrix(df_temp)
    (accuracy, precision, recall, F1) = calculate_metrics(confusion_df)
    
    print("*** Step {}:".format(i))
    print(f"Accuracy: {accuracy:.3f}")
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1: {F1:.3f}")

    i += 1

*** Step 1:
Accuracy: 0.856
Precision: 0.931
Recall: 0.769
F1: 0.842
*** Step 2:
Accuracy: 0.856
Precision: 0.931
Recall: 0.768
F1: 0.842
*** Step 3:
Accuracy: 0.851
Precision: 0.930
Recall: 0.759
F1: 0.836
*** Step 4:
Accuracy: 0.859
Precision: 0.942
Recall: 0.765
F1: 0.844
*** Step 5:
Accuracy: 0.853
Precision: 0.940
Recall: 0.754
F1: 0.837
*** Step 6:
Accuracy: 0.795
Precision: 0.849
Recall: 0.718
F1: 0.778
*** Step 7:
Accuracy: 0.856
Precision: 0.931
Recall: 0.768
F1: 0.842
*** Step 8:
Accuracy: 0.818
Precision: 0.893
Recall: 0.723
F1: 0.799


Drops in accuracy/F1 observed after stop words removal and stemming. 