In [1]:
import pandas as pd
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from time import sleep
import os
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score
import numpy as np
from sklearn.utils import class_weight

In [2]:
os.chdir('D:/MSC/NLP/NLP_ASS2/semeval-tweets/')
import twokenize

In [3]:
BASE_DIR = 'D:/MSC/NLP/NLP_ASS2/semeval-tweets/'

In [4]:
## Contractions
file = open(BASE_DIR + 'CONTRACTIONS.pkl', "rb")
CONTRACTION_MAP = pickle.load(file)
sleep(1)
del file
contraction = sorted(CONTRACTION_MAP, key=len, reverse=True)
# c_re = re.compile('(%s)' % '|'.join(contraction))
c_re = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in contraction))


def expandContractions(text, c_re=c_re):
    def replace(match):
        return CONTRACTION_MAP[match.group(0)]
    return c_re.sub(replace, text.lower())

## Emojis Regex
file = open(BASE_DIR + 'EMOJIS.pkl', "rb")
EMOJIS = pickle.load(file)
sleep(1)
del file
emojis = sorted(EMOJIS, key=len, reverse=True)
pattern = '|'.join(re.escape(u) for u in emojis)
regex_emoji = re.compile(pattern, re.U)

## Regex expression for Booster Increase and Decrease words
B_INC = ['exceptionally', 'substantially', 'considerable', 'considerably', 'particularly', 'tremendously', 'unbelievably',\
         'exceptional', 'absolutely', 'completely', 'enormously', 'especially', 'fabulously', 'incredible', 'incredibly',\
         'remarkably', 'thoroughly', 'tremendous', 'amazingly', 'decidedly', 'extremely', 'intensely', 'unusually',\
         'enormous', 'entirely', 'flipping', 'fracking', 'fricking', 'frigging', 'awfully', 'extreme', 'flippin',\
         'frackin', 'frickin', 'friggin', 'fucking', 'fugging', 'greatly', 'majorly', 'totally', 'utterly', 'deeply',\
         'effing', 'fuckin', 'fuggin', 'highly', 'hugely', 'purely', 'really', 'fully', 'hella', 'major', 'quite',\
         'total', 'utter', 'more', 'most', 'uber', 'very', 'so']
B_DEC = ['almost', 'barely', 'hardly', 'just enough', 'kind of', 'kinda', 'kindof', 'kind-of', 'less', 'little',\
         'marginal', 'marginally', 'occasional', 'occasionally', 'partly', 'scarce', 'scarcely', 'slight', 'slightly',\
         'somewhat', 'sort of', 'sorta', 'sortof', 'sort-of']
# pattern_binc = '|'.join(re.escape(u) for u in B_INC)
pattern_binc = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in B_INC))
# pattern_bdec = '|'.join(re.escape(u) for u in B_DEC)
pattern_bdec = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in B_DEC))

## Reading EXICON DICT 
file = open(BASE_DIR + 'VADER_LEXICONS_SCORE.pkl', "rb")
LEXICON_DICT = pickle.load(file)
sleep(1)
del file


file = open(BASE_DIR + 'POSITIVE.pkl', "rb")
POS_LIST = pickle.load(file)
sleep(1)
del file
POS_LIST = sorted(POS_LIST, key=len, reverse=True)
# pattern_pos = '|'.join(re.escape(u) for u in POS_LIST)
pattern_pos = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in POS_LIST))


file = open(BASE_DIR + 'NEGATIVE.pkl', "rb")
NEG_LIST = pickle.load(file)
sleep(1)
del file
NEG_LIST = sorted(NEG_LIST, key=len, reverse=True)
# pattern_neg = '|'.join(re.escape(u) for u in NEG_LIST)
pattern_neg = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in NEG_LIST))


file = open(BASE_DIR + 'BAD.pkl', "rb")
BAD_LIST = pickle.load(file)
sleep(1)
del file
BAD_LIST = sorted(BAD_LIST, key=len, reverse=True)
# pattern_bad = '|'.join(re.escape(u) for u in BAD_LIST)
pattern_bad = re.compile('|'.join(r'\b'+re.escape(u)+r'\b' for u in BAD_LIST))

In [5]:
def return_tok_val(sentence):
    pos_tok = []
    neg_tok = []
    tokens = twokenize.tokenizeRawTweetText(sentence)
    ln_all_tokens = len(tokens)
    for toke in tokens:
        val = LEXICON_DICT.get(toke)
        if val:
            if val >0:
                pos_tok.append(val)
            else:
                neg_tok.append(val)
    
    return tokens,ln_all_tokens,len(pos_tok),len(neg_tok),sum(pos_tok),sum(neg_tok)

def clean_data(data,col,re_emoji,re_inc_boostr,re_dec_boostr,re_pos,re_neg,re_bad):
    dataframe = data.copy()
    link_regex = re.compile(r'(?:ftp|https?|www|file)\.?:?[//|\\\\]?[\w\d:#@%/;$()~_?\+-=\\\&]+\.[\w\d:#@%/;$~_?\+-=\\\&]+')
    dataframe = dataframe.assign(**dict(zip([col,'num_link'], zip(*dataframe['text'].apply(lambda x: re.subn(link_regex,'LINK',x) ) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_usermention'], zip(*dataframe[col].apply(lambda x: re.subn(r'@[\w]*','USERMENTION',x)) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_hashtag'], zip(*dataframe[col].apply(lambda x: re.subn(r'#[\w]*','HASHTAG',x) ) ))))
    dataframe = dataframe.assign(**dict(zip([col,'num_emoji'], zip(*dataframe[col].apply(lambda x: re.subn(re_emoji, lambda m: EMOJIS.get(m.group(), 'EMOJI') , x) )))))
    dataframe[col] = dataframe[col].apply(lambda x : re.sub(r'(.)\1{2,}', r'\1',x)) # make looong as long
    dataframe[col] = dataframe[col].apply(lambda x : expandContractions(x) ) # expand contracts
    dataframe[col] = dataframe[col].str.lower()
    dataframe['NUM_INC_BOOSTR'] = dataframe[col].apply(lambda x : len(re.findall(re_inc_boostr,x)) ) # Booster Increasing Words
    dataframe['NUM_DEC_BOOSTR'] = dataframe[col].apply(lambda x : len(re.findall(re_dec_boostr,x)) ) # Booster Increasing Words
    dataframe['NUM_POS_WORDS'] = dataframe[col].apply(lambda x : len(re.findall(re_pos,x)) ) # Number of Positive words
    dataframe['NUM_NEG_WORDS'] = dataframe[col].apply(lambda x : len(re.findall(re_neg,x)) ) # Number of Positive words
    dataframe['NUM_BAD_WORDS'] = dataframe[col].apply(lambda x : len(re.findall(re_bad,x)) ) # Number of Positive words
    dataframe = dataframe.assign(**dict(zip([col,'num_exclaim'], zip(*dataframe[col].apply(lambda x: re.subn(r"[!]",'',x)  ) ))))  
    dataframe = dataframe.assign(**dict(zip([col,'num_punct'], zip(*dataframe[col].apply(lambda x: re.subn(r"['\"“”‘’.?!…,:;]",'',x)  ) ))))  
    
    dataframe = dataframe.assign(**dict(zip([col,'nm_all_tok','nm_pos_tok','nm_neg_tok','sm_pos_tok','sum_nrg_tok'], zip(*dataframe[col].apply(lambda x: return_tok_val(x) ) ))))
    dataframe['lex_tok'] = dataframe['nm_pos_tok'] + dataframe['nm_neg_tok']
    dataframe['total_lex_score'] = dataframe['sm_pos_tok'] + dataframe['sum_nrg_tok']
    dataframe['r_sco/tok'] = dataframe['total_lex_score']/dataframe['lex_tok']
    dataframe['sum_nrg_tok'] = dataframe['sum_nrg_tok'].abs()
    
    for col in dataframe.columns.difference(['clean_text','text','sentiment','nm_all_tok']):
        dataframe[col] = dataframe[col]/dataframe['nm_all_tok']
    for col in dataframe.columns.difference(['clean_text','text','sentiment']):
        dataframe[col] += abs(dataframe[col].min())
        dataframe[col] = dataframe[col]/dataframe[col].max()   
    dataframe = dataframe.fillna(0)
    
    return dataframe

In [6]:
df_train = pd.read_csv(BASE_DIR + 'twitter-training-data.txt',sep='\t',names=['ID','sentiment','text']).drop('ID',axis=1)
df_dev = pd.read_csv(BASE_DIR + 'twitter-dev-data.txt',sep='\t',names=['ID','sentiment','text']).drop('ID',axis=1)
df_test1 = pd.read_csv(BASE_DIR + 'twitter-test1.txt',sep='\t',names=['ID','sentiment','text']).drop('ID',axis=1)
df_test2 = pd.read_csv(BASE_DIR + 'twitter-test2.txt',sep='\t',names=['ID','sentiment','text']).drop('ID',axis=1)
df_test3 = pd.read_csv(BASE_DIR + 'twitter-test3.txt',sep='\t',names=['ID','sentiment','text']).drop('ID',axis=1)

In [7]:
df_train['clean_text'] = df_train['text']
df_train = clean_data(df_train,'clean_text',regex_emoji,pattern_binc,pattern_bdec,pattern_pos,pattern_neg,pattern_bad)
print('Train data processed')

df_dev['clean_text'] = df_dev['text']
df_dev = clean_data(df_dev,'clean_text',regex_emoji,pattern_binc,pattern_bdec,pattern_pos,pattern_neg,pattern_bad)
print('Developemnt data processed')

df_test1['clean_text'] = df_test1['text']
df_test1 = clean_data(df_test1,'clean_text',regex_emoji,pattern_binc,pattern_bdec,pattern_pos,pattern_neg,pattern_bad)
print('Test1 data processed')

df_test2['clean_text'] = df_test2['text']
df_test2 = clean_data(df_test2,'clean_text',regex_emoji,pattern_binc,pattern_bdec,pattern_pos,pattern_neg,pattern_bad)
print('Test2 data processed')

df_test3['clean_text'] = df_test3['text']
df_test3 = clean_data(df_test3,'clean_text',regex_emoji,pattern_binc,pattern_bdec,pattern_pos,pattern_neg,pattern_bad)
print('Test3 data processed')

Train data processed
Developemnt data processed
Test1 data processed
Test2 data processed
Test3 data processed


In [8]:
replace = dict(zip(df_train['sentiment'].unique(),range(len(df_train['sentiment'].unique()))))
df_train['sentiment'] = df_train['sentiment'].replace(replace)
df_dev['sentiment'] = df_dev['sentiment'].replace(replace)
df_test1['sentiment'] = df_test1['sentiment'].replace(replace)
df_test2['sentiment'] = df_test2['sentiment'].replace(replace)
df_test3['sentiment'] = df_test3['sentiment'].replace(replace)

In [9]:
tfidf = TfidfVectorizer(tokenizer=lambda x: x,preprocessor=lambda x: x,ngram_range=(1,3),min_df=10)
tfidf_vectorizer_vectors_train = tfidf.fit_transform(df_train.loc[:,'clean_text'].values)
tfidf_vectorizer_vectors_dev = tfidf.transform(df_dev.loc[:,'clean_text'].values)
tfidf_vectorizer_vectors_test1 = tfidf.transform(df_test1.loc[:,'clean_text'].values)
tfidf_vectorizer_vectors_test2 = tfidf.transform(df_test2.loc[:,'clean_text'].values)
tfidf_vectorizer_vectors_test3 = tfidf.transform(df_test3.loc[:,'clean_text'].values)

In [15]:
train_x = tfidf_vectorizer_vectors_train.toarray()
dev_x = tfidf_vectorizer_vectors_dev.toarray()
test1_x = tfidf_vectorizer_vectors_test1.toarray()
test2_x = tfidf_vectorizer_vectors_test2.toarray()
test3_x = tfidf_vectorizer_vectors_test3.toarray()


train_y = df_train['sentiment'].values
dev_y = df_dev['sentiment'].values
test1_y = df_test1['sentiment'].values
test2_y = df_test2['sentiment'].values
test3_y = df_test3['sentiment'].values

train_x = np.concatenate((train_x,df_train[df_train.columns.difference(['sentiment', 'text', 'clean_text'])].values),axis=1)
dev_x = np.concatenate((dev_x,df_dev[df_dev.columns.difference(['sentiment', 'text', 'clean_text'])].values),axis=1)
test1_x = np.concatenate((test1_x,df_test1[df_test1.columns.difference(['sentiment', 'text', 'clean_text'])].values),axis=1)
test2_x = np.concatenate((test2_x,df_test2[df_test2.columns.difference(['sentiment', 'text', 'clean_text'])].values),axis=1)
test3_x = np.concatenate((test3_x,df_test3[df_test3.columns.difference(['sentiment', 'text', 'clean_text'])].values),axis=1)

In [13]:
from sklearn.linear_model import LogisticRegression

In [16]:
model = LogisticRegression(max_iter=300).fit(train_x,train_y) #,sample
print('F1 Score on Train data',f1_score(train_y,model.predict(train_x),average='weighted'))
print('F1 Score on Dev data',f1_score(dev_y,model.predict(dev_x),average='weighted'))
print('F1 Score on Test1 data',f1_score(test1_y,model.predict(test1_x),average='weighted'))
print('F1 Score on Test2 data',f1_score(test2_y,model.predict(test2_x),average='weighted'))
print('F1 Score on Test3 data',f1_score(test3_y,model.predict(test3_x),average='weighted'))

F1 Score on Train data 0.7797991365405622
F1 Score on Train data 0.6376087403662589
F1 Score on Test1 data 0.701197191127753
F1 Score on Test2 data 0.6975931538023925
F1 Score on Test3 data 0.6690442962168911


In [66]:
model = MultinomialNB().fit(train_x,train_y)
print('F1 Score on Train data',f1_score(train_y,model.predict(train_x),average='weighted'))
print('F1 Score on Dev data',f1_score(dev_y,model.predict(dev_x),average='weighted'))
print('F1 Score on Test1 data',f1_score(test1_y,model.predict(test1_x),average='weighted'))
print('F1 Score on Test2 data',f1_score(test2_y,model.predict(test2_x),average='weighted'))
print('F1 Score on Test3 data',f1_score(test3_y,model.predict(test3_x),average='weighted'))

F1 Score on Train data 0.7049961251375948
F1 Score on Dev data 0.646387800145492
F1 Score on Test1 data 0.6334959724093925
F1 Score on Test2 data 0.6851088436160981
F1 Score on Test3 data 0.6225931218142564


In [68]:
from sklearn.svm import LinearSVC

In [69]:
model = LinearSVC(max_iter=1000).fit(train_x,train_y)
print('F1 Score on Train data',f1_score(train_y,model.predict(train_x),average='weighted'))
print('F1 Score on Dev data',f1_score(dev_y,model.predict(dev_x),average='weighted'))
print('F1 Score on Test1 data',f1_score(test1_y,model.predict(test1_x),average='weighted'))
print('F1 Score on Test2 data',f1_score(test2_y,model.predict(test2_x),average='weighted'))
print('F1 Score on Test3 data',f1_score(test3_y,model.predict(test3_x),average='weighted'))

F1 Score on Train data 0.857551049026708
F1 Score on Dev data 0.6541147310239087
F1 Score on Test1 data 0.6816422461948877
F1 Score on Test2 data 0.7009861923840657
F1 Score on Test3 data 0.6433326337341967


In [77]:
features = tfidf_vectorizer_vectors_train.toarray()
vocab = dict(zip(tfidf.get_feature_names(),features[:,:len(tfidf.get_feature_names())].sum(axis=0)))

embeddings_dict = {}
with open("glove.6B.100d.txt", 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        token = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[token] = vector


new_vocab = {}
while len(new_vocab) != 4998:
    word = max(vocab,key=vocab.get)
    vector = embeddings_dict.get(word)
    if vector is None :
        vocab.pop(word)
    else:
        new_vocab[word] = vector
        vocab.pop(word)
        if len(new_vocab) == 4998:
            break


In [80]:
EMBEDD_LEN = 100
vocab = {'PAD':np.random.normal(scale=0.6, size=(EMBEDD_LEN, ))}
vocab.update(new_vocab)

Shorter Vocab is Read


In [83]:
import itertools

In [84]:
df_train['clean_text'] = df_train['clean_text'].apply(lambda x: [key for key, grp in itertools.groupby([ i if i in vocab else 'UNK' for i in x ])] )
df_train['nm_all_tok']= df_train['clean_text'].apply(lambda x: len(x))

df_dev['clean_text'] = df_dev['clean_text'].apply(lambda x: [key for key, grp in itertools.groupby([ i if i in vocab else 'UNK' for i in x ])] )
df_test1['clean_text'] = df_test1['clean_text'].apply(lambda x: [key for key, grp in itertools.groupby([ i if i in vocab else 'UNK' for i in x ])] )
df_test2['clean_text'] = df_test2['clean_text'].apply(lambda x: [key for key, grp in itertools.groupby([ i if i in vocab else 'UNK' for i in x ])] )
df_test3['clean_text'] = df_test3['clean_text'].apply(lambda x: [key for key, grp in itertools.groupby([ i if i in vocab else 'UNK' for i in x ])] )


In [85]:
MIN_LENGTH = 1 #data_train['nm_all_tok'].quantile(0.01)
MAX_LENGTH = 33 #data_train['nm_all_tok'].quantile(0.99)
df_train = df_train[~(df_train['nm_all_tok'] < MIN_LENGTH) | (df_train['nm_all_tok'] > MAX_LENGTH)]

In [88]:
import torch

In [89]:
vocab['UNK'] = np.random.normal(scale=0.6, size=(EMBEDD_LEN, ))
word_to_idx = dict(zip(vocab.keys(),range(len(vocab.keys()))))
weights_matrix = torch.from_numpy(np.vstack(list(vocab.values())))

df_train['clean_text'] = df_train['clean_text'].apply(lambda x: [ word_to_idx[item] for item in x ])
df_dev['clean_text'] = df_dev['clean_text'].apply(lambda x: [ word_to_idx[item] for item in x ])
df_test1['clean_text'] = df_test1['clean_text'].apply(lambda x: [ word_to_idx[item] for item in x ])
df_test2['clean_text'] = df_test2['clean_text'].apply(lambda x: [ word_to_idx[item] for item in x ])
df_test3['clean_text'] = df_test3['clean_text'].apply(lambda x: [ word_to_idx[item] for item in x ])

In [91]:
from sklearn.preprocessing import LabelBinarizer

In [92]:
binariser = LabelBinarizer()
train_y = binariser.fit_transform(df_train['sentiment'].values)
dev_y = binariser.transform(df_dev['sentiment'].values)
test_y1 = binariser.transform(df_test1['sentiment'].values)
test_y2 = binariser.transform(df_test2['sentiment'].values)
test_y3 = binariser.transform(df_test3['sentiment'].values)

In [93]:
def custom_collate_fn(batch,y_none=False):
    if y_none:
        x = batch
    else:
        x, y = zip(*batch)
        y = torch.from_numpy(np.vstack(list(map(lambda x: torch.tensor(x), y))))
    x = list(map(lambda x: torch.tensor(x), x))
    lens = list(map(len, x))
    padded = pad_sequence(x, batch_first=True)
    if y_none:
        return padded,lens
    else:
        return padded, y,lens

def create_emb_layer(weights_matrix, non_trainable=False):
    num_embeddings, embedding_dim = weights_matrix.size()
    emb_layer = torch.nn.Embedding(num_embeddings, embedding_dim)
    emb_layer.load_state_dict({'weight': weights_matrix})
    if non_trainable:
        emb_layer.weight.requires_grad = False

    return emb_layer, num_embeddings, embedding_dim

class ToyNN1(torch.nn.Module):
    def __init__(self, weights_matrix, hidden_size, num_layers,output_size):
        super().__init__()
        self.embedding, num_embeddings, embedding_dim = create_emb_layer(weights_matrix, True)
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True,dropout=0.3,bidirectional=True)
        self.dropout = torch.nn.Dropout(0.5)
        self.hidden_layer1 = torch.nn.Sequential(
            torch.nn.Linear(256, output_size),)
        self.hidden_layer = torch.nn.Sequential(
            torch.nn.Linear(256, 64),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(64, output_size))
        
        
    def forward(self, inp,lens):
        if next(self.parameters()).is_cuda:
            pad_embed = self.embedding(inp).to(device)
        else:
            pad_embed = self.embedding(inp)
        pad_embed_pack = pack_padded_sequence(pad_embed, lens, batch_first=True, enforce_sorted=False)
        seq, (ht, ct) = self.lstm(pad_embed_pack)
        out = self.dropout(torch.cat((ht[-1],ht[-2]), 1))
        out = self.hidden_layer1(out)
        return torch.nn.functional.softmax(out,dim=-1)
    

In [95]:
DATA_TRAIN = list(zip(list(df_train['clean_text'].values),train_y))
DATA_DEV = list(zip(list(df_dev['clean_text'].values),dev_y))
DATA_TEST1 = list(zip(list(df_test1['clean_text'].values),test_y1))
DATA_TEST2 = list(zip(list(df_test2['clean_text'].values),test_y2))
DATA_TEST3 = list(zip(list(df_test3['clean_text'].values),test_y3))
print(len(DATA_TRAIN))
print(len(DATA_DEV))
print(len(DATA_TEST1))
print(len(DATA_TEST2))
print(len(DATA_TEST3))

45026
2000
3531
1853
2379


In [97]:
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

In [98]:
BATCH_SIZE = 500
loader_train = DataLoader(DATA_TRAIN, batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate_fn,drop_last=True)
dev_x,dev_y,dev_length = custom_collate_fn(DATA_DEV)
test_x1,test_y1,test_lengths1 = custom_collate_fn(DATA_TEST1)
test_x2,test_y2,test_lengths2 = custom_collate_fn(DATA_TEST2)
test_x3,test_y3,test_lengths3 = custom_collate_fn(DATA_TEST3)

In [99]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True

is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")


GPU is available


In [100]:
model = ToyNN1(weights_matrix,128,2,3)
model.to(device)
print(model)

lr=0.001
criterion = torch.nn.CrossEntropyLoss(reduction='none')
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
clip = 5
epochs = 100

ToyNN1(
  (embedding): Embedding(5000, 100)
  (lstm): LSTM(100, 128, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (hidden_layer1): Sequential(
    (0): Linear(in_features=256, out_features=3, bias=True)
  )
  (hidden_layer): Sequential(
    (0): Linear(in_features=256, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=64, out_features=3, bias=True)
  )
)


In [102]:
from sklearn.utils.class_weight import compute_sample_weight

In [103]:
torch.cuda.empty_cache()
train_acces,train_losses = [],[]
MAX_AVG_TEST_ACC = 0

for epoch in range(epochs):
    train_outputs,train_labels = [],[]
    train_acc = 0.0
    model.train()
    for inputs,labels,lengths in loader_train:
        sample_weight = torch.from_numpy(compute_sample_weight(class_weight='balanced',y=labels)).to(device)
        inputs, labels = inputs.to(device), labels.to(device)   
        model.zero_grad()
        output = model(inputs,lengths)
        loss = criterion(output.squeeze(), labels.float())
        loss = (loss * sample_weight).mean()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        
        train_outputs.append(output.cpu().detach().numpy())
        train_labels.append(labels.cpu().detach().numpy())
        del inputs, labels, output, loss
        torch.cuda.empty_cache()
        
    with torch.no_grad():
        train_outputs = np.vstack(train_outputs)
        train_y = np.vstack(train_labels)
        train_loss = criterion(torch.from_numpy(train_outputs.squeeze()),torch.from_numpy(train_y.astype(float)) ).mean()
        train_losses.append(train_loss)
        train_acc = f1_score( np.argmax(train_y,axis=-1) , np.argmax(train_outputs,axis=-1) ,average='macro')
        train_acces.append(train_acc)

        test_outputs = model(test_x1.to(device),test_lengths1).cpu()
        test_acc1 = f1_score( np.argmax(test_y1,axis=-1) , torch.argmax(test_outputs,dim=-1) ,average='macro')
        
        test_outputs = model(test_x2.to(device),test_lengths2).cpu()
        test_acc2 = f1_score( np.argmax(test_y2,axis=-1) , torch.argmax(test_outputs,dim=-1) ,average='macro')
        
        test_outputs = model(test_x3.to(device),test_lengths3).cpu()
        test_acc3 = f1_score( np.argmax(test_y3,axis=-1) , torch.argmax(test_outputs,dim=-1) ,average='macro')
        
        avg_test_acc = np.mean([test_acc1,test_acc2,test_acc3])
        if avg_test_acc > MAX_AVG_TEST_ACC:
            print("AVERAGE MAXIMUM TEST ACC IS",avg_test_acc)
            torch.save(model.state_dict(),'Pytorch_lstm_model.pt' )
            MAX_AVG_TEST_ACC = avg_test_acc

        print('''Epoch {epoch} Train Acc: {train_acc}, Test1 Acc: {test1_acc} Test2 Acc {test2_acc} Test 3 Acc {test3_acc} AVG Test ACC {avg_acc}'''\
              .format(epoch=epoch,train_acc=train_acc,test1_acc=test_acc1,test2_acc=test_acc2,test3_acc=test_acc3,avg_acc=avg_test_acc))
        del test_outputs,train_outputs, train_labels,train_loss
        torch.cuda.empty_cache()
        print(25*'==')

AVERAGE MAXIMUM TEST ACC IS 0.4886213783083792
Epoch 0 Train Acc: 0.38676231116285437, Test1 Acc: 0.5010962542074545 Test2 Acc 0.5167452896003581 Test 3 Acc 0.4480225911173248 AVG Test ACC 0.4886213783083792
AVERAGE MAXIMUM TEST ACC IS 0.5561683807991162
Epoch 1 Train Acc: 0.5098300260248229, Test1 Acc: 0.5765023759682059 Test2 Acc 0.5555542273775739 Test 3 Acc 0.5364485390515689 AVG Test ACC 0.5561683807991162
AVERAGE MAXIMUM TEST ACC IS 0.5597255855492057
Epoch 2 Train Acc: 0.5369832538983298, Test1 Acc: 0.588279328481722 Test2 Acc 0.5689936499442806 Test 3 Acc 0.5219037782216144 AVG Test ACC 0.5597255855492057
Epoch 3 Train Acc: 0.5499728557272713, Test1 Acc: 0.5867642798897008 Test2 Acc 0.5544837509550912 Test 3 Acc 0.52867246150474 AVG Test ACC 0.5566401641165107
AVERAGE MAXIMUM TEST ACC IS 0.588495724258117
Epoch 4 Train Acc: 0.5619357129738459, Test1 Acc: 0.6134902017584949 Test2 Acc 0.5907768104941149 Test 3 Acc 0.5612201605217412 AVG Test ACC 0.588495724258117
Epoch 5 Train Ac

RuntimeError: CUDA out of memory. Tried to allocate 774.00 MiB (GPU 0; 4.00 GiB total capacity; 669.61 MiB already allocated; 1.97 GiB free; 728.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF