In [None]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from matplotlib.pyplot import plot as plt

torch.manual_seed(1)
from data import *
import cleaningtool as ct
from helpers import *

from model import *
import sys
import nltk
from nltk.corpus import stopwords

In [None]:
cleaning = True
## Define paths
DATA_FOLDER = './data_1/'
TRAIN_PATH = DATA_FOLDER + 'train.tsv'
TEST_PATH = DATA_FOLDER + 'test.tsv'
VALID_PATH = DATA_FOLDER + 'valid.tsv'

train_data = load_data(TRAIN_PATH)
test_data = load_data(TEST_PATH)
valid_data = load_data(VALID_PATH)


train_data = train_data[["statement", "label"]]
test_data = test_data[["statement", "label"]]
valid_data = valid_data[["statement", "label"]]

df_raw = pd.concat([train_data, test_data, valid_data], axis=0, sort=False).reset_index()

In [None]:
if cleaning == True:
    print("before :-",df_raw["statement"][0])
    train_data = clean_data(train_data,"statement")
    test_data = clean_data(test_data,"statement")
    valid_data = clean_data(valid_data,"statement")
    df_raw = clean_data(df_raw,'statement')
    print()
    print("after :-", df_raw["statement"][0])


# To make BOW vector and target

In [None]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

# Sent to Words

In [None]:
df_raw_x, df_raw_y = sent_words(df_raw), sent_words(df_raw,label=True)
x_train, y_train, x_val, y_val = sent_words(train_data), sent_words(train_data,label=True), sent_words(valid_data), sent_words(valid_data,label=True)
x_test, y_test = sent_words(test_data), sent_words(test_data,label=True)

# Merging X and Y

In [None]:
x_train = np.array(x_train).reshape(len(x_train),1)
y_train = np.array(y_train).reshape(len(x_train),1)
x_test = np.array(x_test).reshape(len(x_test),1)
y_test = np.array(y_test).reshape(len(x_test),1)
x_val = np.array(x_val).reshape(len(x_val),1)
y_val = np.array(y_val).reshape(len(x_val),1)

train = np.concatenate((x_train,y_train),axis = 1)
val = np.concatenate((x_val,y_val),axis = 1)
test = np.concatenate((x_test,y_test),axis = 1)

# change it's value as per classification task requirement

In [None]:
#number of out classes, out
out = 2
word_to_ix = word_to_ix_(df_raw_x)
if out == 2:
    label_to_ix = {'true': 1, 'mostly-true': 1, 'half-true': 1, 'barely-true': 0, 'false': 0, 'pants-fire': 0}
else:
    label_to_ix = {'true': 5, 'mostly-true': 4, 'half-true': 3, 'barely-true': 2, 'false': 1, 'pants-fire': 0}

# creating word_to_ix and label_to_ix dict and vice versa

In [None]:
ix_to_word = dict((v,k) for k,v in word_to_ix.items())
ix_to_label = dict((v,k) for k,v in label_to_ix.items())
VOCAB_SIZE = len( word_to_ix )
NUM_LABELS = len(set(label_to_ix.keys()))

# Creating Model


In [None]:
model = BoWClassifier(out,VOCAB_SIZE)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
device = torch.device("cuda")
model.to(device)
try : model.load_state_dict(torch.load('BOW.pth'), strict = True)
except : pass
model

In [None]:
#training
update_weights = 512
loss_t = []
acc = 0
epoch_ = 15
for epoch in range(epoch_):
    running_loss = 0
    optimizer.zero_grad()
    print("epoch number :",epoch+1)
    for i,(x,y) in enumerate(train):
        model.train()

        x,y = make_bow_vector(x,word_to_ix).to(device),make_target(y,label_to_ix).to(device)

        out = model(x)

        loss = loss_function(out,y)
        loss.backward()
        loss_t.append(running_loss)
        running_loss += loss.item()
        
        
        if i % update_weights == update_weights - 1:    # update weights as defined	
            print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss/update_weights ))
            running_loss = 0
            optimizer.step()
            optimizer.zero_grad()
    torch.save(model.state_dict(),"weights/EXP_"+str(epoch)+".pth")
            
    with torch.no_grad():
        model.eval()
        num = 0
        length = 0
        for x,y in val:
            x,y = make_bow_vector(x,word_to_ix).to(device),make_target(y,label_to_ix).to(device)
            out = model(x)
            out,pred = torch.max(out,1)
            if y == pred.item():
                num = num+1
            length = length + 1
        accuracy = (num/length)*100
        
        print("accuray while evaluating is :",accuracy)

In [None]:
#testing
for j in range(epoch_):
    with torch.no_grad():
            model.eval()
            num = 0
            length = 0
            model.load_state_dict(torch.load("weights/EXP_"+str(j)+".pth"), strict = True)
            for x,y in test:
                x,y = make_bow_vector(x,word_to_ix).to(device),make_target(y,label_to_ix).to(device)
                out = model(x)
                out,pred = torch.max(out,1)
                if y == pred.item():
                    num = num+1
                length = length + 1
            accuracy = (num/length)*100
            print("accuray while evaluating at"+str(j)+" is :",accuracy,"%.")