In [1]:
import keras
import nltk
import pandas as pd
import numpy as np
import re
import codecs

Using TensorFlow backend.


In [2]:
clean_questions = pd.read_csv("emotion.csv")
clean_questions.columns=['label', 'sentences']
clean_questions.head()

Unnamed: 0,label,sentences
0,0,i feel bitter that it s not me
1,0,i feel the need to like her status even though...
2,0,i stayed home i'm feeling somewhat bitter
3,0,i sat there in frustration realizing that i wa...
4,0,i feel a little bitter towards the whole disease


In [3]:
from nltk.tokenize import RegexpTokenizer

tokenizer = RegexpTokenizer(r'\w+')

clean_questions["tokens"] = clean_questions["sentences"].apply(tokenizer.tokenize)
clean_questions.head()

Unnamed: 0,label,sentences,tokens
0,0,i feel bitter that it s not me,"[i, feel, bitter, that, it, s, not, me]"
1,0,i feel the need to like her status even though...,"[i, feel, the, need, to, like, her, status, ev..."
2,0,i stayed home i'm feeling somewhat bitter,"[i, stayed, home, i, m, feeling, somewhat, bit..."
3,0,i sat there in frustration realizing that i wa...,"[i, sat, there, in, frustration, realizing, th..."
4,0,i feel a little bitter towards the whole disease,"[i, feel, a, little, bitter, towards, the, who..."


#  0==angry
# 1==fear

In [6]:
from sklearn.model_selection import train_test_split
list_corpus = clean_questions["sentences"].tolist()
list_labels = clean_questions["label"].tolist()

X_train, X_test, y_train, y_test = train_test_split(list_corpus, list_labels, test_size=0.2, 
                                                                                random_state=40)

In [8]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
def tfidf(data):
    tfidf_vectorizer = TfidfVectorizer()

    train = tfidf_vectorizer.fit_transform(data)

    return train, tfidf_vectorizer

X_train_tfidf, tfidf_vectorizer = tfidf(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [10]:
from sklearn.linear_model import LogisticRegression
clf_tfidf = LogisticRegression(C=30.0, class_weight='balanced', solver='newton-cg', 
                         multi_class='multinomial', n_jobs=-1, random_state=40)
clf_tfidf.fit(X_train_tfidf, y_train)

y_predicted_tfidf = clf_tfidf.predict(X_test_tfidf)

In [11]:
y_predicted_tfidf

array([1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [12]:
sent1 = tfidf_vectorizer.transform(["i feel anger"])
clf_tfidf.predict(sent1)

array([0])