# Assignment Two:  Sentiment Classification

For this exercise you will be using the "SemEval 2017 task 4" corpus provided on the module website, available through the following link: https://warwick.ac.uk/fac/sci/dcs/teaching/material/cs918/semeval-tweets.tar.bz2 You will focus particularly on Subtask A, i.e. classifying the overall sentiment of a tweet as positive, negative or neutral.

You are requested to produce a Jupyter notebook for the coursework submission. The input to your program is the SemEval data downloaded. Note that TAs need to run your program on their own machine by using the original SemEval data. As such, don’t submit a Python program that takes as input some preprocessed files.

#### Import necessary packages
You may import more packages here.

In [2]:
# Import necessary packages
import re
from os.path import join
import numpy as np

In [3]:
# Define test sets
testsets = ['twitter-test1.txt', 'twitter-test2.txt', 'twitter-test3.txt']


In [4]:
# Skeleton: Evaluation code for the test sets
def read_test(testset):
    '''
    readin the testset and return a dictionary
    :param testset: str, the file name of the testset to compare
    '''
    id_gts = {}
    with open(testset, 'r', encoding='utf8') as fh:
        for line in fh:
            fields = line.split('\t')
            tweetid = fields[0]
            gt = fields[1]

            id_gts[tweetid] = gt

    return id_gts


def confusion(id_preds, testset, classifier):
    '''
    print the confusion matrix of {'positive', 'netative'} between preds and testset
    :param id_preds: a dictionary of predictions formated as {<tweetid>:<sentiment>, ... }
    :param testset: str, the file name of the testset to compare
    :classifier: str, the name of the classifier
    '''
    id_gts = read_test(testset)

    gts = []
    for m, c1 in id_gts.items():
        if c1 not in gts:
            gts.append(c1)

    gts = ['positive', 'negative', 'neutral']

    conf = {}
    for c1 in gts:
        conf[c1] = {}
        for c2 in gts:
            conf[c1][c2] = 0

    for tweetid, gt in id_gts.items():
        if tweetid in id_preds:
            pred = id_preds[tweetid]
        else:
            pred = 'neutral'
        conf[pred][gt] += 1

    print(''.ljust(12) + '  '.join(gts))

    for c1 in gts:
        print(c1.ljust(12), end='')
        for c2 in gts:
            if sum(conf[c1].values()) > 0:
                print('%.3f     ' % (conf[c1][c2] / float(sum(conf[c1].values()))), end='')
            else:
                print('0.000     ', end='')
        print('')

    print('')


def evaluate(id_preds, testset, classifier):
    '''
    print the macro-F1 score of {'positive', 'netative'} between preds and testset
    :param id_preds: a dictionary of predictions formated as {<tweetid>:<sentiment>, ... }
    :param testset: str, the file name of the testset to compare
    :classifier: str, the name of the classifier
    '''
    id_gts = read_test(testset)

    acc_by_class = {}
    for gt in ['positive', 'negative', 'neutral']:
        acc_by_class[gt] = {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0}

    catf1s = {}

    ok = 0
    for tweetid, gt in id_gts.items():
        if tweetid in id_preds:
            pred = id_preds[tweetid]
        else:
            pred = 'neutral'

        if gt == pred:
            ok += 1
            acc_by_class[gt]['tp'] += 1
        else:
            acc_by_class[gt]['fn'] += 1
            acc_by_class[pred]['fp'] += 1

    catcount = 0
    itemcount = 0
    macro = {'p': 0, 'r': 0, 'f1': 0}
    micro = {'p': 0, 'r': 0, 'f1': 0}
    semevalmacro = {'p': 0, 'r': 0, 'f1': 0}

    microtp = 0
    microfp = 0
    microtn = 0
    microfn = 0
    for cat, acc in acc_by_class.items():
        catcount += 1

        microtp += acc['tp']
        microfp += acc['fp']
        microtn += acc['tn']
        microfn += acc['fn']

        p = 0
        if (acc['tp'] + acc['fp']) > 0:
            p = float(acc['tp']) / (acc['tp'] + acc['fp'])

        r = 0
        if (acc['tp'] + acc['fn']) > 0:
            r = float(acc['tp']) / (acc['tp'] + acc['fn'])

        f1 = 0
        if (p + r) > 0:
            f1 = 2 * p * r / (p + r)

        catf1s[cat] = f1

        n = acc['tp'] + acc['fn']

        macro['p'] += p
        macro['r'] += r
        macro['f1'] += f1

        if cat in ['positive', 'negative']:
            semevalmacro['p'] += p
            semevalmacro['r'] += r
            semevalmacro['f1'] += f1

        itemcount += n

    micro['p'] = float(microtp) / float(microtp + microfp)
    micro['r'] = float(microtp) / float(microtp + microfn)
    micro['f1'] = 2 * float(micro['p']) * micro['r'] / float(micro['p'] + micro['r'])

    semevalmacrof1 = semevalmacro['f1'] / 2

    print(testset + ' (' + classifier + '): %.3f' % semevalmacrof1)

#### Load training set, dev set and testing set
Here, you need to load the training set, the development set and the test set. For better classification results, you may need to preprocess tweets before sending them to the classifiers.

In [5]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('universal_tagset')
nltk.download('wordnet')
#!pip install contractions
import contractions
lemmatizer = WordNetLemmatizer()
all_stopwords=stopwords.words('english')
all_stopwords.remove('not')

# Load training set, dev set and testing set
data = {}
tweetids = {}
tweetgts = {}
tweets = {}


for dataset in ['twitter-training-data.txt'] + testsets + ['twitter-dev-data.txt']:
    data[dataset] = []
    tweets[dataset] = []
    tweetids[dataset] = []
    tweetgts[dataset] = []

    # write code to read in the datasets here
    with open(dataset,'r', encoding='utf8') as f:
        for line in f:
            fields = line.split('\t')
            #lowercase
            fields[2]=fields[2].lower()
            #remove newlines
            fields[2]=re.sub(r"\n","", fields[2])
            #remove url and www
            fields[2]=re.sub(r"(http|ftp|www)\S+","",fields[2])
            #remove the user mentions (@) and hashtags (#)
            fields[2]=re.sub(r"([@#] ?)\S+","",fields[2])
            words_lem=[]
            #split line into words
            words_=fields[2].split()
            #use contractions (convert "shouldn't" to "should not")
            expanded_words=[]
            for word_ in words_:
                expanded_words.append(contractions.fix(word_)) 
            #remove stop words
            words_no_stop = [word for word in expanded_words if not word in all_stopwords]
            #use pos tagging to lemmatize the words
            words_pos_tag=nltk.pos_tag(words_no_stop, tagset = "universal")
            for i1 in range(len(words_no_stop)):
                type1=words_pos_tag[i1][1]
                if type1=="VERB":
                    type2=nltk.corpus.wordnet.VERB
                elif type1=="ADJ":
                    type2=nltk.corpus.wordnet.ADJ
                elif type1=="ADV":
                    type2=nltk.corpus.wordnet.ADV
                else:
                    type2=nltk.corpus.wordnet.NOUN
                words_lem.append(lemmatizer.lemmatize(words_pos_tag[i1][0],type2))
            new_fields2 =' '.join(words_lem)
            fields[2]=new_fields2
            #remove non alpha-numeric characters
            #fields[2]=re.sub(r"[^A-Za-z0-9 ]", "",fields[2])
            #remove all words starting with a number/digit
            fields[2]=re.sub(r"\b[0-9]\S*", "",fields[2])
            #remove all one letter characters 
            fields[2]=re.sub(r"\b[a-z]\b", "",fields[2])
            #assign fields accordingly  
            tweetid = fields[0]
            gt = fields[1]
            tweet = fields[2]
            data[dataset].append(fields)
            tweets[dataset].append(tweet)
            tweetids[dataset].append(tweetid)
            tweetgts[dataset].append(gt)
            


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Piotrek\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Piotrek\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     C:\Users\Piotrek\AppData\Roaming\nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Piotrek\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


#### Build sentiment classifiers
You need to create your own classifiers (at least 3 classifiers). For each classifier, you can choose between the bag-of-word features and the word-embedding-based features. Each classifier has to be evaluated over 3 test sets. Make sure your classifier produce consistent performance across the test sets. Marking will be based on the performance over all 5 test sets (2 of them are not provided to you).

In [5]:
# Buid traditional sentiment classifiers. An example classifier name 'svm' is given
# in the code below. You should replace the other two classifier names
# with your own choices. For features used for classifier training, 
# the 'bow' feature is given in the code. But you could also explore the 
# use of other features.
for classifier in ['svc', 'NB', 'NB_tfidf']:
    for features in ['bow']:
        # Skeleton: Creation and training of the classifiers
        if classifier == 'svm':
            # write the svm classifier here
            print('Training ' + classifier)
        elif classifier == '<classifier-2-name>':
            # write the classifier 2 here
            print('Training ' + classifier)
        elif classifier == '<classifier-3-name>':
            # write the classifier 3 here
            print('Training ' + classifier)
        elif classifier == 'LSTM':
            # write the LSTM classifier here
            if features == 'bow':
                continue
            print('Training ' + classifier)
        else:
            print('Unknown classifier name' + classifier)
            continue

        # Predition performance of the classifiers
        for testset in testsets:
            id_preds = {}
            # write the prediction and evaluation code here

            testset_name = testset
            testset_path = join('semeval-tweets', testset_name)
            evaluate(id_preds, testset_path, features + '-' + classifier)

Training svm


FileNotFoundError: [Errno 2] No such file or directory: 'semeval-tweets\\twitter-test1.txt'

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(tweets['twitter-training-data.txt'])
# TfidfTransformer transoforms count matrix to tf-idf representation.
tfidf_transformer = TfidfTransformer()
# fit_transform transforms count matrix to tf-idf representation(vector).
X_train_tfidf = tfidf_transformer.fit_transform(X_train)
X_test1=vectorizer.transform(tweets['twitter-test1.txt'])
X_test2=vectorizer.transform(tweets['twitter-test2.txt'])
X_test3=vectorizer.transform(tweets['twitter-test3.txt'])
X_test_tfidf1=tfidf_transformer.transform(X_test1)
X_test_tfidf2=tfidf_transformer.transform(X_test2)
X_test_tfidf3=tfidf_transformer.transform(X_test3)
Y_train=tweetgts['twitter-training-data.txt']
Y_test1=tweetgts['twitter-test1.txt']
Y_test2=tweetgts['twitter-test2.txt']
Y_test3=tweetgts['twitter-test3.txt']
X_valid=vectorizer.transform(tweets['twitter-dev-data.txt'])
Y_valid=tweetgts['twitter-dev-data.txt']

# Naive Bayes model

In [11]:
#training the NB model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
NB = naive_bayes.MultinomialNB()
NB.fit(X_train,Y_train)
#generate predictions for NB model
predictions_NB1 = NB.predict(X_test1)
predictions_NB2 = NB.predict(X_test2)
predictions_NB3 = NB.predict(X_test3)
#train NB model with tf-idf as model inputs 
NB.fit(X_train_tfidf,Y_train)
#generate predictions for NB model with tf-idf as model inputs
predictions_NB1_tfidf = NB.predict(X_test1)
predictions_NB2_tfidf = NB.predict(X_test2)
predictions_NB3_tfidf = NB.predict(X_test3)

#create dictionaries with the predictions
predictions_NB1_dict=dict(zip(tweetids['twitter-test1.txt'],predictions_NB1))
predictions_NB2_dict=dict(zip(tweetids['twitter-test2.txt'],predictions_NB2))
predictions_NB3_dict=dict(zip(tweetids['twitter-test3.txt'],predictions_NB3))
predictions_NB1_tfidf_dict=dict(zip(tweetids['twitter-test1.txt'],predictions_NB1_tfidf))
predictions_NB2_tfidf_dict=dict(zip(tweetids['twitter-test2.txt'],predictions_NB2_tfidf))
predictions_NB3_tfidf_dict=dict(zip(tweetids['twitter-test3.txt'],predictions_NB3_tfidf))
#evaluate the performance of the classifier
eval1=evaluate(predictions_NB1_dict,'twitter-test1.txt','NB')
eval2=evaluate(predictions_NB2_dict,'twitter-test2.txt','NB')
eval3=evaluate(predictions_NB3_dict,'twitter-test3.txt','NB')

#generate confusion matrices
conf1=confusion(predictions_NB1_dict,'twitter-test1.txt','NB')
conf2=confusion(predictions_NB2_dict,'twitter-test2.txt','NB')
conf3=confusion(predictions_NB3_dict,'twitter-test3.txt','NB')

#do the same for tf-idf

eval1=evaluate(predictions_NB1_tfidf_dict,'twitter-test1.txt','NB_tfidf')
eval2=evaluate(predictions_NB2_tfidf_dict,'twitter-test2.txt','NB_tfidf')
eval3=evaluate(predictions_NB3_tfidf_dict,'twitter-test3.txt','NB_tfidf')

conf1=confusion(predictions_NB1_tfidf_dict,'twitter-test1.txt','NB_tfidf')
conf2=confusion(predictions_NB2_tfidf_dict,'twitter-test2.txt','NB_tfidf')
conf3=confusion(predictions_NB3_tfidf_dict,'twitter-test3.txt','NB_tfidf')

twitter-test1.txt (NB): 0.512
twitter-test2.txt (NB): 0.475
twitter-test3.txt (NB): 0.476
            positive  negative  neutral
positive    0.610     0.081     0.309     
negative    0.114     0.692     0.194     
neutral     0.264     0.166     0.570     

            positive  negative  neutral
positive    0.675     0.074     0.251     
negative    0.217     0.478     0.304     
neutral     0.338     0.127     0.536     

            positive  negative  neutral
positive    0.631     0.088     0.281     
negative    0.223     0.466     0.311     
neutral     0.296     0.157     0.547     

twitter-test1.txt (NB_tfidf): 0.380
twitter-test2.txt (NB_tfidf): 0.402
twitter-test3.txt (NB_tfidf): 0.369
            positive  negative  neutral
positive    0.615     0.088     0.297     
negative    0.025     0.825     0.150     
neutral     0.249     0.205     0.546     

            positive  negative  neutral
positive    0.681     0.076     0.243     
negative    0.125     0.625     0.250  

# SVM model

In [31]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
#train the model
svc = OneVsRestClassifier(SVC()).fit(X_train, Y_train)
#generate predictions 
predictions_svc1=svc.predict(X_test1)
predictions_svc2=svc.predict(X_test2)
predictions_svc3=svc.predict(X_test3)
#create the dictionary with the predictions
predictions_svc1_dict=dict(zip(tweetids['twitter-test1.txt'],predictions_svc1))
predictions_svc2_dict=dict(zip(tweetids['twitter-test2.txt'],predictions_svc2))
predictions_svc3_dict=dict(zip(tweetids['twitter-test3.txt'],predictions_svc3))
#evaluate the perfomance
eval_svc1=evaluate(predictions_svc1_dict,'twitter-test1.txt','svc')
eval_svc2=evaluate(predictions_svc2_dict,'twitter-test2.txt','svc')
eval_svc3=evaluate(predictions_svc3_dict,'twitter-test3.txt','svc')
#generate confusion matrices
conf_svc1=confusion(predictions_svc1_dict,'twitter-test1.txt','svc')
conf_svc2=confusion(predictions_svc2_dict,'twitter-test2.txt','svc')
conf_svc3=confusion(predictions_svc3_dict,'twitter-test3.txt','svc')

twitter-test1.txt (svc): 0.543
twitter-test2.txt (svc): 0.555
twitter-test3.txt (svc): 0.476
            positive  negative  neutral
positive    0.785     0.054     0.161     
negative    0.112     0.807     0.081     
neutral     0.267     0.151     0.583     

            positive  negative  neutral
positive    0.810     0.056     0.134     
negative    0.104     0.806     0.090     
neutral     0.349     0.103     0.548     

            positive  negative  neutral
positive    0.763     0.079     0.157     
negative    0.211     0.648     0.141     
neutral     0.306     0.144     0.549     



# Logistic Regression


In [12]:
from sklearn.linear_model import LogisticRegression
#train the model
log_reg=LogisticRegression(solver='lbfgs', max_iter=1000)
log_reg.fit(X_train,Y_train)
#generate predictions
predictions_log_reg1 = log_reg.predict(X_test1)
predictions_log_reg2 = log_reg.predict(X_test2)
predictions_log_reg3 = log_reg.predict(X_test3)
#create dictionaries with the predictions
predictions_log_reg1_dict=dict(zip(tweetids['twitter-test1.txt'],predictions_log_reg1))
predictions_log_reg2_dict=dict(zip(tweetids['twitter-test2.txt'],predictions_log_reg2))
predictions_log_reg3_dict=dict(zip(tweetids['twitter-test3.txt'],predictions_log_reg3))
#evaluate the performance
eval_log_reg1=evaluate(predictions_log_reg1_dict,'twitter-test1.txt','log_reg')
eval_log_reg2=evaluate(predictions_log_reg2_dict,'twitter-test2.txt','log_reg')
eval_log_reg3=evaluate(predictions_log_reg3_dict,'twitter-test3.txt','log_reg')
#generate the confusion matrices
conf_log_reg1=confusion(predictions_log_reg1_dict,'twitter-test1.txt','log_reg')
conf_log_reg2=confusion(predictions_log_reg2_dict,'twitter-test2.txt','log_reg')
conf_log_reg3=confusion(predictions_log_reg3_dict,'twitter-test3.txt','log_reg')

twitter-test1.txt (log_reg): 0.559
twitter-test2.txt (log_reg): 0.571
twitter-test3.txt (log_reg): 0.536
            positive  negative  neutral
positive    0.691     0.060     0.249     
negative    0.158     0.653     0.189     
neutral     0.276     0.142     0.583     

            positive  negative  neutral
positive    0.736     0.049     0.215     
negative    0.178     0.645     0.178     
neutral     0.364     0.104     0.532     

            positive  negative  neutral
positive    0.711     0.066     0.223     
negative    0.205     0.545     0.250     
neutral     0.301     0.134     0.565     



# LSTM


In [8]:
#tokenize the words
from nltk.tokenize import word_tokenize
from collections import defaultdict,Counter
word_counter=Counter()
for sentences in tweets['twitter-training-data.txt']:
    words = list(word_tokenize(sentences))
    word_counter.update(words)

In [9]:
max_words=5000
words_5000=word_counter.most_common(max_words)
word2id=dict()
word2id={'unk': 0}
for i,word in enumerate(word_counter.keys()):
    word2id[word]=i+1
id2word = {v: k for k, v in word2id.items()}

In [10]:
#building the embedding dictionary
embeddings_dictionary = dict()
glove_name='glove.6B.100d.txt'
with open(glove_name,'r', encoding='utf8') as f2:
    for line in f2:
        word_ = line.split()
        word_vector = np.asarray(word_[1:],dtype='float16')
        embeddings_dictionary[word_[0]] = word_vector

In [11]:
#building the embedding matrix
import torch
embedding_dim=100;
embedding_matrix=torch.zeros(len(word_counter)+1, embedding_dim)
for word, index in word2id.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix[index] = torch.from_numpy(embedding_vector)

In [12]:
#change the labels to numeric
train_labels_num=[]
for labels in tweetgts['twitter-training-data.txt']:
    if labels=='positive':
        train_labels_num.append(2)
    elif labels=='negative':
        train_labels_num.append(0)
    else:
        train_labels_num.append(1)
test1_labels_num=[]
test2_labels_num=[]
test3_labels_num=[]
valid_labels_num=[]
for labels in tweetgts['twitter-test1.txt']:
    if labels=='positive':
        test1_labels_num.append(2)
    elif labels=='negative':
        test1_labels_num.append(0)
    else:
        test1_labels_num.append(1)
for labels in tweetgts['twitter-test2.txt']:
    if labels=='positive':
        test2_labels_num.append(2)
    elif labels=='negative':
        test2_labels_num.append(0)
    else:
        test2_labels_num.append(1)
for labels in tweetgts['twitter-test3.txt']:
    if labels=='positive':
        test3_labels_num.append(2)
    elif labels=='negative':
        test3_labels_num.append(0)
    else:
        test3_labels_num.append(1)
for labels in tweetgts['twitter-dev-data.txt']:
    if labels=='positive':
        valid_labels_num.append(2)
    elif labels=='negative':
        valid_labels_num.append(0)
    else:
        valid_labels_num.append(1)

In [13]:
#prepare the lstm model
from torch.utils.data import Dataset,DataLoader
class TextDataSet(Dataset):
    def __init__(self, texts, labels=None):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts[index]
        if self.labels is not None:
            label = self.labels[index]
            return text, label
        else:
            return text
        
def texts2tensor(texts,word2id,pad_token = 0,max_len = 100):
    indexes_list = [[word2id.get(word,0) for word in word_tokenize(text)] for text in texts]
    max_len = min(max_len,max([len(indexes) for indexes in indexes_list]))
    if max_len > 100:
        raise Exception("max > 100")
    truncated_indexes = [indexes[:max_len] for indexes in indexes_list]
    padded_indexes = [indexes+[0]*(max_len - len(indexes)) for indexes in truncated_indexes]
    return torch.LongTensor(padded_indexes)

def train_collate(batch_inputs):
    texts,labels = zip(*batch_inputs)
    input_tensor = texts2tensor(texts,word2id)
    return input_tensor,torch.LongTensor(labels)

train_dataset = TextDataSet(tweets['twitter-training-data.txt'],train_labels_num)
test1_dataset = TextDataSet(tweets['twitter-test1.txt'],test1_labels_num)
test2_dataset = TextDataSet(tweets['twitter-test2.txt'],test2_labels_num)
test3_dataset = TextDataSet(tweets['twitter-test3.txt'],test3_labels_num)
valid_dataset = TextDataSet(tweets['twitter-dev-data.txt'],valid_labels_num)
    
train_loader = DataLoader(train_dataset,batch_size= 20, shuffle = True,collate_fn=train_collate)
test1_loader = DataLoader(test1_dataset,batch_size=20,shuffle=False,collate_fn=train_collate)
test2_loader = DataLoader(test2_dataset,batch_size=20,shuffle=False,collate_fn=train_collate)
test3_loader = DataLoader(test3_dataset,batch_size=20,shuffle=False,collate_fn=train_collate)
valid_loader = DataLoader(valid_dataset,batch_size=20,shuffle=False,collate_fn=train_collate)


In [14]:
#define the lstm model for classification
from torch import nn
class LstmClassification(nn.Module):
    def __init__(self,embedding_matrix):
        super(LstmClassification,self).__init__()
        self.embedding_matrix=embedding_matrix
        #extract the dimensions of the embedding matrix 
        word_number=self.embedding_matrix.shape[0]
        embedding_dim=self.embedding_matrix.shape[1]
        #define the size of the hidden layer
        self.hidden_size=128;
        #define the number of layers
        self.num_layers=1;
        #define the linear activation function
        self.linear  = nn.Linear(in_features=self.hidden_size*1,out_features=3)
        #define the lstm model
        self.lstm = nn.LSTM(input_size=embedding_dim,hidden_size=self.hidden_size,num_layers=1,batch_first=True)
        #define embedding matrix
        self.embedding = nn.Embedding(word_number, embedding_dim)
        self.embedding.weight.data.copy_(embedding_matrix)
        #do not change the weights in the embedding matrix 
        self.embedding.weight.requires_grad=False
    def forward(self,inputs):
        #get outputs
        embedded = self.embedding(inputs)
        outputs,(hs,cs) = self.lstm(embedded)
        return  self.linear(outputs[:,-1,:])

In [15]:
#define the loss function
loss_func = nn.CrossEntropyLoss()

In [16]:
#train the model
device='cpu'
#initialize minimum validation loss
min_valid_loss=np.inf
#initialize the model
model = LstmClassification(embedding_matrix = embedding_matrix).to(device)
#define the number of epochs
epochs  = 8
#define the Adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
#start the training
for e in range(1,epochs+1):
    i1 = 0
    total_batch = len(train_loader)
    epoch_loss = 0
    for tweets_,labels in train_loader:
        i1=i1+1
        tweets_=tweets_.to(device)
        labels=labels.to(device)
        #calculate the result
        res = model(tweets_)
        optimizer.zero_grad()
        #calculate the loss
        loss = loss_func(res,labels)
        loss.backward()
        optimizer.step()
        #eupdate the epoch loss
        epoch_loss += loss.item()
    #the model with the lowest validation loss will be used for testing
    valid_loss = 0.0
    model.eval()
    for tweets_,labels in valid_loader:
        res = model(tweets_)
        loss_v =  loss_func(res,labels)
        valid_loss += loss_v.item()
    #update the mininimum validation loss if current loss is lower 
    if valid_loss<min_valid_loss:
        min_valid_loss=valid_loss
        print('Updated validation loss for epoch',e)
        torch.save(model.state_dict(),'lstm.pth')
    print('Current validation loss:', valid_loss)
    print(" mean loss:",epoch_loss/total_batch)

Updated validation loss for epoch 1
Current validation loss: 79.2295429110527
 mean loss: 0.8865138337491675
Updated validation loss for epoch 2
Current validation loss: 79.02526041865349
 mean loss: 0.7843568304434736
Updated validation loss for epoch 3
Current validation loss: 74.70885288715363
 mean loss: 0.7497183578022828
Updated validation loss for epoch 4
Current validation loss: 74.06291690468788
 mean loss: 0.7184780675997126
Updated validation loss for epoch 5
Current validation loss: 73.95404157042503
 mean loss: 0.6824796456737933
Current validation loss: 74.59383723139763
 mean loss: 0.6462708931178489
Current validation loss: 75.96393448114395
 mean loss: 0.6068626248627795
Current validation loss: 77.87227150797844
 mean loss: 0.5635580478354971


In [18]:
#load the best model
model.load_state_dict(torch.load('lstm.pth'))

<All keys matched successfully>

In [19]:
# predict the labels in the tests set
model.eval()
pred_loaders=[]
with torch.no_grad():
    for loader in [test1_loader,test2_loader,test3_loader]:
        pred_labels = []
        for tweets_,labels in loader:
            tweets_=tweets_.to('cpu')
            labels=labels.to('cpu')
            outputs=model(tweets_)
            val,pred_label=torch.max(outputs.data,1)
            pred_label_list = pred_label.cpu().detach().numpy().tolist()
            pred_labels = pred_labels+pred_label_list
        pred_loaders.append(pred_labels)

In [20]:
#convert labels back to negative, neutral and positive
pred_loaders_labels=[]
for loaders in pred_loaders:
    pred_loader_labels=[]
    for pred in loaders:
        if pred==0:
            pred_loader_labels.append('negative')
        elif pred==1:
            pred_loader_labels.append('neutral')
        elif pred==2:
            pred_loader_labels.append('positive')
    pred_loaders_labels.append(pred_loader_labels)
    
predictions_lstm1_dict=dict(zip(tweetids['twitter-test1.txt'],pred_loaders_labels[0]))
predictions_lstm2_dict=dict(zip(tweetids['twitter-test2.txt'],pred_loaders_labels[1]))
predictions_lstm3_dict=dict(zip(tweetids['twitter-test3.txt'],pred_loaders_labels[2]))

#evaluate the lstm model
eval_lstm1=evaluate(predictions_lstm1_dict,'twitter-test1.txt','lstm')
eval_lstm2=evaluate(predictions_lstm2_dict,'twitter-test2.txt','lstm')
eval_lstm3=evaluate(predictions_lstm3_dict,'twitter-test3.txt','lstm')

twitter-test1.txt (lstm): 0.623
twitter-test2.txt (lstm): 0.621
twitter-test3.txt (lstm): 0.587


In [21]:
confusion_lstm1=confusion(predictions_lstm1_dict,'twitter-test1.txt','lstm')
confusion_lstm2=confusion(predictions_lstm2_dict,'twitter-test2.txt','lstm')
confusion_lstm3=confusion(predictions_lstm3_dict,'twitter-test3.txt','lstm')

            positive  negative  neutral
positive    0.719     0.040     0.241     
negative    0.152     0.714     0.134     
neutral     0.215     0.151     0.634     

            positive  negative  neutral
positive    0.751     0.040     0.209     
negative    0.108     0.735     0.157     
neutral     0.306     0.114     0.580     

            positive  negative  neutral
positive    0.759     0.053     0.189     
negative    0.148     0.579     0.273     
neutral     0.282     0.127     0.591     

