In [1]:
import re 
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import pickle
from google.colab import drive
import warnings

warnings.filterwarnings('ignore')

## helper function
def load_from_pickle(directory):
    return pickle.load(open(directory,"rb"))

def read_data(file):
    data = []
    with open(file, 'r')as f:
        for line in f:
            line = line.strip()
            label = ' '.join(line[1:line.find("]")].strip().split())
            text = line[line.find("]")+1:].strip()
            data.append([label, text])
    return data

file = 'dataset.txt'
drive.mount('/content/gdrive')

data = load_from_pickle(directory="gdrive/My Drive/merged_training.pkl")

emotions = [ "sadness", "joy", "love", "anger", "fear", "surprise"]
data = data[data["emotions"].isin(emotions)]

print("Number of instances in dataset: {}".format(len(data)))


def ngram(token, n): 
    output = []
    for i in range(n-1, len(token)): 
        ngram = ' '.join(token[i-n+1:i+1])
        output.append(ngram) 
    return output

def create_feature(text, nrange=(1, 1)):
    text_features = [] 
    text = text.lower() 
    text_alphanum = re.sub('[^a-z0-9#]', ' ', text)
    for n in range(nrange[0], nrange[1]+1): 
        text_features += ngram(text_alphanum.split(), n)    
    text_punc = re.sub('[a-z0-9]', ' ', text)
    text_features += ngram(text_punc.split(), 1)
    return Counter(text_features)

def convert_label(item, name): 
    label = ""
    for idx in range(len(item)): 
        if item[idx] == 1: 
            label += name[idx] + " "
    
    return label.strip()


X_all = []
y_all = []
i = 0
while i < len(data):
  label = str(data["emotions"].values[i])
  text = str(data["text"].values[i])
  y_all.append(label.strip())
  X_all.append(create_feature(text, nrange=(1, 4)))
  i = i + 1


X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size = 0.2, random_state = 123)

def train_test(clf, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)

    train_acc = accuracy_score(y_train, clf.predict(X_train))
    test_acc = accuracy_score(y_test, clf.predict(X_test))
    return train_acc, test_acc

from sklearn.feature_extraction import DictVectorizer
vectorizer = DictVectorizer(sparse = True)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)


# LinearSVC was tested to have the highest accuracy
lsvc = LinearSVC(random_state=123)
#svc = SVC()
#rforest = RandomForestClassifier(random_state=123)
#dtree = DecisionTreeClassifier()
clifs = [lsvc] 
 
print("| {:25} | {} | {} |".format("Classifier", "Training Accuracy", "Test Accuracy"))
print("| {} | {} | {} |".format("-"*25, "-"*17, "-"*13))
for clf in clifs: 
    clf_name = clf.__class__.__name__
    train_acc, test_acc = train_test(clf, X_train, X_test, y_train, y_test)
    print("| {:25} | {:17.7f} | {:13.7f} |".format(clf_name, train_acc, test_acc))



Mounted at /content/gdrive
Number of instances in dataset: 416809
| Classifier                | Training Accuracy | Test Accuracy |
| ------------------------- | ----------------- | ------------- |
| LinearSVC                 |         0.9573216 |     0.8780739 |


In [5]:
emoji_dict = {"sadness":"😢", "joy":"😂", "love":"😍", "anger":"😠", "fear":"😱", "surprise":"😳"}

check = input("Enter a sentence or enter BYE to stop... \n")

while check != "BYE":
  text = str(check)
  features = create_feature(text, nrange=(1, 4))
  features = vectorizer.transform(features)
  prediction = clf.predict(features)[0]
  print("(Emotion: "+ prediction + " " + emoji_dict[prediction] + ")")
  check = input("Enter a sentence or enter END to stop... \n")



Enter a sentence or enter BYE to stop... 
My dog died yesterday
(Emotion: sadness 😢)
Enter a sentence or enter END to stop... 
I feel romantic especially around valentines day
(Emotion: love 😍)
Enter a sentence or enter END to stop... 
Today was such a fun day
(Emotion: joy 😂)
Enter a sentence or enter END to stop... 
i don't like to feel uncomfortable with being alone
(Emotion: fear 😱)
Enter a sentence or enter END to stop... 
oh no you didnt
(Emotion: anger 😠)
Enter a sentence or enter END to stop... 
My friend broke my phone
(Emotion: sadness 😢)
Enter a sentence or enter END to stop... 
BYE
