In [1]:
from tqdm import tqdm_notebook as tqdm
import preprocessor as p
import numpy as np

In [2]:
# %run ../twitter15/twitter15.ipynb
%run ../twitter16/twitter16_text_processing.ipynb

In [3]:
def preprocess(text):
    p.set_options(p.OPT.URL,p.OPT.MENTION,p.OPT.EMOJI,p.OPT.HASHTAG)
    return p.tokenize(text).split()

In [4]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.optim as optim
import torch.nn.functional as F

In [5]:
import pickle as pkl
from collections import defaultdict
import pandas as pd
import os
import numpy as np
import json
from tqdm import tqdm, tqdm_notebook
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score

In [6]:
from collections import Counter
import spacy
from tqdm import tqdm, tqdm_notebook, tnrange
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

In [7]:
import matplotlib.pyplot as plt
import random

In [8]:
def preprocess(text):
    p.set_options(p.OPT.URL, p.OPT.MENTION, p.OPT.EMOJI ,p.OPT.HASHTAG)
    return p.tokenize(text).split()

In [9]:
def indexer(split_text):
    sent2idx = []
    for w in split_text:
        if w.lower() in word2idx:
            sent2idx.append(word2idx[w.lower()])
        else:
            sent2idx.append(word2idx['_UNK'])
            
    return sent2idx

In [10]:

'''
STEP 1: LOADING DATASET
'''

labels = {'true':0,'false':1,'unverified':2,'non-rumor':3}

data = list(twitter15_text.keys())
random.shuffle(data)
train_ids = data[:700]
valid_ids = data[700:]
train_text = [twitter15_text[x] for x in train_ids]
valid_text = [twitter15_text[x] for x in valid_ids]

train = pd.DataFrame({'text_id':train_ids ,'raw_text':train_text})
train['clean_text'] = train.raw_text.apply(lambda x: preprocess(x.strip()))

words = Counter()
for sent in tqdm(train.clean_text.values):
    words.update(w.lower() for w in sent)
   
# sort with most frequently occuring words first
words = sorted(words, key=words.get, reverse=True)
# add <pad> and <unk> token to vocab which will be used later
words = ['_PAD','_UNK'] + words

word2idx = {o:i for i,o in enumerate(words)}
idx2word = {i:o for i,o in enumerate(words)}

train['sentence2idx'] = train.clean_text.apply(lambda x: indexer(x))
train['length'] = train.clean_text.apply(lambda x: len(x))
train['label'] = train.text_id.apply(lambda x: labels[twitter15_labels[x]])

valid = pd.DataFrame({'text_id':valid_ids ,'raw_text':valid_text})
valid['clean_text'] = valid.raw_text.apply(lambda x: preprocess(x.strip()))

valid['sentence2idx'] = valid.clean_text.apply(lambda x: indexer(x))
valid['length'] = valid.clean_text.apply(lambda x: len(x))
valid['label'] = valid.text_id.apply(lambda x: labels[twitter15_labels[x]])

100%|██████████| 700/700 [00:00<00:00, 100280.51it/s]


In [11]:
fulldata = pd.concat((train,valid))
keys = list(fulldata['text_id'])
vals = list(fulldata['sentence2idx'])
sent2idx = dict(zip(keys, vals))

In [12]:
class VectorizeData(Dataset):
    def __init__(self, df, maxlen=30):
        self.maxlen = maxlen
        self.df = df
#         print('Padding')
        self.df['padded_text'] = self.df.sentence2idx.apply(lambda x: self.pad_data(x))
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
#         lens = self.df.length[idx]
        X = self.df.padded_text[idx]
        y = self.df.label[idx]
        lens = self.df.length[idx]
        return X,y,lens
    
    def pad_data(self, s):
        padded = np.zeros((self.maxlen,), dtype=np.int64)
        if len(s) > self.maxlen: padded[:] = s[:self.maxlen]
        else: padded[:len(s)] = s
        return padded

In [13]:
train_loader = VectorizeData(train)
valid_loader = VectorizeData(valid)
tl = DataLoader(dataset=train_loader, batch_size=100, shuffle=True)
vl = DataLoader(dataset=valid_loader, batch_size=100, shuffle=False)


In [14]:
class RecArch(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, rnnType,device):
        super(RecArch, self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.device = device
        self.rnnType = rnnType
        
        self.emb = nn.Embedding(self.vocab_size, embedding_dim)
        
        if self.rnnType == 'lstm':
            self.recNN = nn.LSTM(embedding_dim,hidden_dim,num_layers,batch_first=True)
            
        if self.rnnType == 'gru':
            self.recNN = nn.GRU(embedding_dim, hidden_dim, num_layers, batch_first=True)
        
        if self.rnnType == 'rnn':
            self.recNN = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True, nonlinearity='tanh')
        
        self.fc = nn.Linear(hidden_dim,output_dim)
    
    def forward(self,x):
        embs = self.emb(x)
        embs = embs.view(x.size(0),-1,self.embedding_dim).to(self.device)
        
        h0 = Variable(torch.zeros(self.num_layers,x.size(0),self.hidden_dim),requires_grad=True).to(self.device)
        
        if self.rnnType == 'lstm':        
            c0 = Variable(torch.zeros(self.num_layers,x.size(0),self.hidden_dim),requires_grad=True).to(self.device)
            
            out,(hn,cn) = self.recNN(embs,(h0,c0))
        
        else:
            out, hn = self.recNN(embs, h0)
        
#         print(out[:,-1,:].shape)
        out = self.fc(out[:, -1, :])
        return out

In [15]:
vocab_size = len(words)
embedding_dim = 256
n_hidden = 50
n_out = 4
num_layers = 1
rnnType = 'gru'

if torch.cuda.is_available():
    device = 'cuda:2'
else:
    device = 'cpu'
    
model = RecArch(vocab_size,embedding_dim,n_hidden,n_out,num_layers,rnnType,device)
model.to(device)
model.float()

RecArch(
  (emb): Embedding(3351, 256)
  (recNN): GRU(256, 50, batch_first=True)
  (fc): Linear(in_features=50, out_features=4, bias=True)
)

In [17]:
optimizer = torch.optim.Adagrad(model.parameters(),lr=0.01)
# criterion = torch.nn.BCEWithLogitsLoss()
criterion = torch.nn.CrossEntropyLoss()

count = 0
seq_dim = 30
num_epochs = 200

train_losses_iterwise = []
recall_iterwise = []
precision_iterwise = []
accuracy_iterwise = []
f1score_iterwise = []
val_losses_iterwise = []

for epoch in tqdm(range(num_epochs)):
    train_losses = []
    val_losses = []
    for i, (text,label,lengths) in enumerate(tl):

        text = Variable(text.view(-1, seq_dim, 1)).to(device)
        label = Variable(label).to(device)
        
#         print(sexism_label)
        
        optimizer.zero_grad()
        outputs = model(text)
        
#         print(outputs)
        
        loss = criterion(outputs, label)
        train_losses.append(loss.data.cpu())
        
        loss.backward()
        optimizer.step()
        count += 1
        
        if count % 50 == 0:    
            correct = 0
            total = 0

            allLabels = []
            allPreds = []
            
            for i, (text,label,lengths) in enumerate(vl):
                labels=[]
                text = Variable(text.view(-1, seq_dim, 1)).to(device)
                label = Variable(label).to(device)
                
                predicted = model(text)
                predicted =  torch.softmax(predicted,1)
                predicted = torch.max(predicted, 1)[1].cpu().numpy().tolist()
#                 print(predicted)
#                 print(sexism_label)
                allLabels += (label.cpu().numpy().tolist())
                allPreds += (predicted)

            valacc = accuracy_score(allLabels, allPreds)
            recscore = recall_score(allLabels, allPreds,average='macro')
            precscore = precision_score(allLabels, allPreds,average='macro')
            f1score = f1_score(allLabels, allPreds,average='macro')
            cr = classification_report(allLabels, allPreds)
            print(f'acc: {valacc} recall {recscore} prec: {precscore} f1: {f1score}')
            print(cr)
            
            train_losses_iterwise.append(np.mean(train_losses))

  4%|▍         | 8/200 [00:00<00:16, 11.96it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7262227928588897 f1: 0.6622897900978219
              precision    recall  f1-score   support

           0       0.51      1.00      0.68        24
           1       0.73      0.67      0.70        33
           2       0.83      0.75      0.79        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.73      0.69      0.66       118
weighted avg       0.74      0.68      0.67       118



  8%|▊         | 16/200 [00:01<00:14, 13.04it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7262227928588897 f1: 0.6622897900978219
              precision    recall  f1-score   support

           0       0.51      1.00      0.68        24
           1       0.73      0.67      0.70        33
           2       0.83      0.75      0.79        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.73      0.69      0.66       118
weighted avg       0.74      0.68      0.67       118



 12%|█▏        | 24/200 [00:01<00:13, 13.51it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7204382183908046 f1: 0.6618163436966351
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.69      0.67      0.68        33
           2       0.83      0.75      0.79        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.72      0.69      0.66       118
weighted avg       0.73      0.68      0.66       118



 15%|█▌        | 30/200 [00:02<00:12, 13.42it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7204382183908046 f1: 0.6618163436966351
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.69      0.67      0.68        33
           2       0.83      0.75      0.79        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.72      0.69      0.66       118
weighted avg       0.73      0.68      0.66       118



 19%|█▉        | 38/200 [00:02<00:11, 13.58it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7165719696969698 f1: 0.6612009640750327
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.69      0.67      0.68        33
           2       0.80      0.75      0.77        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.72      0.69      0.66       118
weighted avg       0.72      0.68      0.66       118



 22%|██▏       | 44/200 [00:03<00:11, 13.46it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7165719696969698 f1: 0.6612009640750327
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.69      0.67      0.68        33
           2       0.80      0.75      0.77        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.72      0.69      0.66       118
weighted avg       0.72      0.68      0.66       118



 26%|██▌       | 52/200 [00:03<00:10, 13.61it/s]

acc: 0.6779661016949152 recall 0.6903735632183907 prec: 0.7165719696969698 f1: 0.6612009640750327
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.69      0.67      0.68        33
           2       0.80      0.75      0.77        32
           3       0.83      0.34      0.49        29

   micro avg       0.68      0.68      0.68       118
   macro avg       0.72      0.69      0.66       118
weighted avg       0.72      0.68      0.66       118



 29%|██▉       | 58/200 [00:04<00:10, 13.47it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 33%|███▎      | 66/200 [00:04<00:09, 13.47it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 37%|███▋      | 74/200 [00:05<00:09, 13.59it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 40%|████      | 80/200 [00:05<00:08, 13.41it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 44%|████▍     | 88/200 [00:06<00:08, 13.55it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 47%|████▋     | 94/200 [00:06<00:07, 13.44it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7253214527408076 f1: 0.6728463562844492
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        24
           1       0.71      0.67      0.69        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.73      0.69      0.67       118



 51%|█████     | 102/200 [00:07<00:07, 13.58it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 55%|█████▌    | 110/200 [00:08<00:06, 13.62it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 58%|█████▊    | 116/200 [00:08<00:06, 13.46it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 62%|██████▏   | 124/200 [00:09<00:05, 13.56it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 65%|██████▌   | 130/200 [00:09<00:05, 13.41it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 69%|██████▉   | 138/200 [00:10<00:04, 13.53it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 72%|███████▏  | 144/200 [00:10<00:04, 13.42it/s]

acc: 0.6864406779661016 recall 0.6989942528735632 prec: 0.7282051282051282 f1: 0.6730169861305907
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.73      0.67      0.70        33
           2       0.80      0.75      0.77        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.73      0.70      0.67       118
weighted avg       0.74      0.69      0.68       118



 76%|███████▌  | 152/200 [00:11<00:03, 13.58it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 79%|███████▉  | 158/200 [00:11<00:03, 13.33it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 83%|████████▎ | 166/200 [00:12<00:02, 13.38it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 87%|████████▋ | 174/200 [00:12<00:01, 13.56it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 90%|█████████ | 180/200 [00:13<00:01, 13.27it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 94%|█████████▍| 188/200 [00:13<00:00, 13.44it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



 97%|█████████▋| 194/200 [00:14<00:00, 13.28it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118



100%|██████████| 200/200 [00:14<00:00, 13.51it/s]

acc: 0.6949152542372882 recall 0.7068067528735632 prec: 0.7361398705113945 f1: 0.6806974776820499
              precision    recall  f1-score   support

           0       0.53      1.00      0.70        24
           1       0.76      0.67      0.71        33
           2       0.81      0.78      0.79        32
           3       0.85      0.38      0.52        29

   micro avg       0.69      0.69      0.69       118
   macro avg       0.74      0.71      0.68       118
weighted avg       0.75      0.69      0.68       118






In [18]:
vocab_size

3351

In [19]:
textEncState = model.state_dict()

### Till this point the Text Model stats have been decided

In [20]:
twitter15_label_file = '../twitter16/label.txt'
twitter15_text_file = '../twitter16/source_tweets.txt'

In [21]:
def load_labels(file):
    f = open(file,'r')
    labels = {}
    
    raw_data = f.readlines()
    
    for line in raw_data:
        line = line.strip()
        line = line.split(':')
        labels[int(line[1])] = line[0]
    
    return labels

In [22]:
twitter15_labels = load_labels(twitter15_label_file)
twitter15_labels

{656955120626880512: 'false',
 615689290706595840: 'true',
 613404935003217920: 'false',
 731166399389962242: 'unverified',
 714598641827246081: 'unverified',
 614467824313106432: 'true',
 715515982584881152: 'unverified',
 693315824132685824: 'non-rumor',
 693843042546106369: 'non-rumor',
 622891631293935616: 'false',
 692630756548591616: 'non-rumor',
 693265096278163456: 'non-rumor',
 553589051044151296: 'true',
 553590835850514433: 'true',
 622858454949040128: 'false',
 656870311057575936: 'false',
 616311563071434753: 'true',
 641666167992647681: 'non-rumor',
 525060425184858112: 'true',
 672513234419638273: 'false',
 544382892378714113: 'true',
 681824512120324096: 'non-rumor',
 620835698514464768: 'false',
 626898253900943360: 'false',
 618804516578680832: 'false',
 672632899921833984: 'false',
 553588178687655936: 'true',
 594687353937100801: 'false',
 613016993692798977: 'false',
 663385747177775105: 'false',
 766715993385267201: 'non-rumor',
 693141729529184256: 'non-rumor',
 

In [23]:
class Node:
    def __init__(self,uid,tid,time_stamp,label):
        self.children = {}
        self.childrenList = []
        self.num_children = 0
        self.tid = tid
        self.uid = uid
        self.label = label
        self.time_stamp = time_stamp
    
    def add_child(self,node):
        if node.uid not in self.children:
            self.children[node.uid] = node
            self.num_children += 1
        else:
            self.children[node.uid] = node
        self.childrenList = list(self.children.values())

In [24]:
class Tree:
    def __init__(self,root):
        self.root = root
        self.tweet_id = root.tid
        self.uid = root.uid
        self.height = 0
        self.nodes = 0
    
    def show(self):
        queue = [self.root,0]
        
        while len(queue) != 0:
            toprint = queue.pop(0)
            if toprint == 0:
                print('\n')
            else:
                print(toprint.uid,end=' ')
                queue += toprint.children.values()
                queue.append(0)
                
    def insertnode(self,curnode,parent,child):
        if curnode.uid == parent.uid:
            curnode.add_child(child)
            return 1

        elif parent.uid in curnode.children:
            s = self.insertnode(curnode.children[parent.uid],parent,child)
            return 2
        else:
            for node in curnode.children:
                s = self.insertnode(curnode.children[node],parent,child)
                if s == 2:
                    break

In [25]:
def loadPklFileNum(datapath,incSize,fileNum):
    
    with open(datapath+str(incSize)+'inc_'+str(fileNum)+'.pickle', 'rb') as handle:
        twitTrees = pkl.load(handle)
    return twitTrees

In [26]:
def loadTreeFilesOfIncrement(datapath,incSize):
    twittertrees = {}
    
    files = [x for x in os.listdir(t15Datapath) if str(incSize)+'inc' in x]
    
    for file in tqdm(files):
        with open(datapath+file,'rb') as handle:
            partialTrees = pkl.load(handle)
        twittertrees.update(partialTrees)
        
    return twittertrees

In [27]:
t15Datapath = '/home/nikhil.pinnaparaju/Research/Temporal Tree Encoding/twitter16/pickledTrees/'
# twitter15_trees = loadPklFileNum(t15Datapath,20,1)

In [28]:
twitter15_trees = loadTreeFilesOfIncrement(t15Datapath,20)

100%|██████████| 9/9 [01:22<00:00,  9.12s/it]


In [29]:
if torch.cuda.is_available():
    device = 'cuda:2'
    device = 'cpu'
else:
    device = 'cpu'

In [30]:
%run ../twitter16/userdata_parser.ipynb

100%|██████████| 1000/1000 [00:00<00:00, 9479.60it/s]
  0%|          | 0/33 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

100%|██████████| 33/33 [02:54<00:00,  5.29s/it]
100%|██████████| 253378/253378 [02:11<00:00, 1927.06it/s]


In [31]:
for key in tqdm(userVects):
    userVects[key] = userVects[key].float()

userVects = defaultdict(lambda:torch.tensor([1.1100e+02, 1.5000e+01, 0.0000e+00, 7.9700e+02, 4.7300e+02, 0.0000e+00,
        8.3326e+04, 1.0000e+00]),userVects)

100%|██████████| 253378/253378 [00:01<00:00, 196893.00it/s]


In [32]:
%run ./temporal_tree_model.ipynb 

In [33]:
labelMap = {}
labelCount = 0
for label in list(twitter15_labels.values()):
    if label not in labelMap:
        labelMap[label] = labelCount
        labelCount += 1
labelMap

{'false': 0, 'true': 1, 'unverified': 2, 'non-rumor': 3}

In [34]:
epochs = 10
X = []
y = []
for tid in twitter15_trees:
        if tid in twitter15_trees and tid in twitter15_labels:
            X.append(twitter15_trees[tid])
            y.append(twitter15_labels[tid])
            
x_train = X[:700]
x_test = X[700:]

In [35]:
criterion = torch.nn.CrossEntropyLoss()

model = treeText(torch.cuda.is_available(),8,100,userVects,twitter15_labels,labelMap,criterion,device,vocab_size,textEncState)
model = model.to(device)

In [36]:
optimizer = torch.optim.Adagrad(model.parameters(),0.01)

maxAcc = 0
count = 0
netloss = 0

for i in range(10):
    print(i)
    train_losses = []
    val_losses = []
    
    for treeSet in tqdm_notebook(x_train):
            tnum = 0
            tree = treeSet[-1]
#         for tree in treeSet:
#             print(count)
            count += 1
#             tnum += 1
            optimizer.zero_grad()
            
            text = torch.tensor(sent2idx[tree.tweet_id])
            text = Variable(text.view(-1, len(text), 1)).to(device)
            
            pred = model(tree.root,text)
            
            label = Variable(torch.tensor(labelMap[treeSet[0].root.label]).reshape(-1).to(device))
            loss = criterion(pred.reshape(1,4),label)
#             print(loss)
            netloss += loss
    
            if count % 20 == 0:
#                 print('opt')
                loss.backward()
                optimizer.step()
            
    preds = []
    labels = []

    allLabels = []
    allPreds = []

    for valSet in tqdm_notebook(x_test):
        finalTree = valSet[-1]
        
        text = torch.tensor(sent2idx[finalTree.tweet_id])
        text = Variable(text.view(-1, len(text), 1)).to(device)
        
        predicted = model(finalTree.root,text)
        preds.append(predicted)
#         print(predicted)
        predicted =  torch.softmax(predicted,0)
        predicted = torch.max(predicted, 0)[1].cpu().numpy().tolist()

        labels.append(labelMap[finalTree.root.label])

        allLabels.append(labelMap[finalTree.root.label])
        allPreds.append(predicted)

    predTensor = torch.stack(preds)
    labelTensor = torch.tensor(labels).to(device)

    print(allLabels,allPreds)

    loss = criterion(predTensor.reshape(-1,4), labelTensor.reshape(-1))

    cr = classification_report(allLabels,allPreds,output_dict=True)
    cr['loss'] = loss.item()
    cr['Acc'] = accuracy_score(allLabels,allPreds,)
    
    if cr['Acc'] > maxAcc:
        maxAcc = cr['Acc']
        torch.save({'state_dict': model.state_dict()}, './earlydetect_twit16.pth')
    
    print('loss: ',cr['loss'])
    print(cr['Acc'])
    
    with open('earlydetect_twit16.json', 'a') as fp:
        json.dump(cr, fp)
        fp.write('\n')

0


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))






HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 0, 0, 1, 0, 0, 2, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 2, 2, 1, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 2, 2, 2, 0, 3, 0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 2, 1, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 2, 1, 0, 1, 2, 0, 0]
loss:  0.6892242431640625
0.6694915254237288
1


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 0, 0, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 0, 3, 3, 2, 0, 2, 1, 3, 0, 0, 0, 3, 1, 1, 2, 0, 0, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 0, 2, 0, 0, 3, 1, 2, 1, 0, 1, 2, 0, 0]
loss:  0.4654288589954376
0.8728813559322034
2


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 0, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.4367779493331909
0.923728813559322
3


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 0, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.35245639085769653
0.923728813559322
4


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.3224862515926361
0.9322033898305084
5


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.30319342017173767
0.9322033898305084
6


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.29023587703704834
0.9322033898305084
7


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.2810232937335968
0.9322033898305084
8


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.2725045084953308
0.9322033898305084
9


HBox(children=(IntProgress(value=0, max=700), HTML(value='')))




HBox(children=(IntProgress(value=0, max=118), HTML(value='')))


[0, 0, 0, 2, 1, 0, 1, 3, 2, 1, 0, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 0, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 1, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 2, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 1, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 2, 3, 2, 3, 0, 3, 1, 1, 1, 0, 1, 2, 0, 3] [0, 0, 0, 2, 1, 0, 1, 3, 3, 1, 3, 0, 2, 1, 3, 2, 2, 0, 1, 0, 0, 3, 0, 0, 1, 2, 3, 2, 3, 3, 1, 1, 2, 2, 1, 0, 2, 3, 2, 2, 0, 0, 2, 2, 2, 0, 3, 0, 3, 1, 1, 1, 1, 0, 1, 3, 3, 0, 1, 1, 3, 3, 1, 2, 2, 2, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 0, 3, 1, 1, 2, 0, 3, 3, 0, 3, 3, 3, 0, 0, 0, 1, 2, 0, 2, 1, 2, 0, 2, 1, 3, 1, 0, 0, 3, 3, 2, 3, 0, 3, 1, 2, 1, 0, 1, 2, 0, 3]
loss:  0.26760333776474
0.9322033898305084


## Early Detection

In [37]:
checkpoint = torch.load('./earlydetect_twit16.pth')
model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [38]:
from os import listdir
from os.path import isfile, join
import re
import numpy as np
import copy

In [39]:
# twitter15_trees = loadTreeFilesOfIncrement(t15Datapath,1)
twitter15_trees = loadPklFileNum(t15Datapath,1,1)

In [40]:
x_test = []
for tid in twitter15_trees:
        if tid in twitter15_trees and tid in twitter15_labels:
            x_test.append(twitter15_trees[tid])

In [41]:
def maxTimeDiff(root):
    current = root 
    
#     print(current.childrenList)
    while(current.childrenList):
        current = current.childrenList[-1]
    return current.time_stamp

In [49]:
timeTaken = []

for valSet in tqdm_notebook(x_test):
    finalInd = len(valSet) - 1
    
    for i in tqdm(range(len(valSet))):
        tree = valSet[i]
        
        text = torch.tensor(sent2idx[tree.tweet_id])
        text = Variable(text.view(-1, len(text), 1)).to(device)
        
        predicted = model(tree.root,text)
        predicted =  torch.softmax(predicted,0)
        predicted = torch.max(predicted, 0)[1].cpu().numpy().tolist()
        
        if predicted == labelMap[tree.root.label]:
            time = maxTimeDiff(tree.root)
#             time = 0
            timeTaken.append(time)
            break
        
        else:
            if i == finalInd:
                time = maxTimeDiff(tree.root)
                timeTaken.append(time)
                break

HBox(children=(IntProgress(value=0, max=98), HTML(value='')))





  0%|          | 0/136 [00:00<?, ?it/s]




  0%|          | 0/185 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/824 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/231 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/160 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/117 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/277 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/279 [00:00<?, ?it/s][A[A[A[A




  0%|          | 0/695 [00:00<?, ?it/s][A[A[A[A



  1%|▏         | 10/695 [00:00<00:07, 96.43it/s][A[A[A[A



  3%|▎         | 18/695 [00:00<00:07, 89.57it/s][A[A[A[A



  3%|▎         | 24/695 [00:00<00:08, 75.48it/s][A[A[A[A



  4%|▍         | 29/695 [00:00<00:11, 56.64it/s][A[A[A[A



  5%|▍         | 34/695 [00:00<00:15, 43.75it/s][A[A[A[A



  5%|▌         | 38/695 [00:00<00:18, 35.60it/s][A[A[A[A



  6%|▌         | 42/695 [00:00<00:21, 29.89it/s][A[A[A[A



  7%|▋         | 46/695 [00:01<00:24, 26.74it/s][A[A[A

 45%|████▌     | 315/695 [00:43<01:39,  3.81it/s][A[A[A[A



 45%|████▌     | 316/695 [00:44<01:39,  3.81it/s][A[A[A[A



 46%|████▌     | 317/695 [00:44<01:43,  3.66it/s][A[A[A[A



 46%|████▌     | 318/695 [00:44<01:42,  3.69it/s][A[A[A[A



 46%|████▌     | 319/695 [00:44<01:41,  3.72it/s][A[A[A[A



 46%|████▌     | 320/695 [00:45<01:40,  3.73it/s][A[A[A[A



 46%|████▌     | 321/695 [00:45<01:40,  3.74it/s][A[A[A[A



 46%|████▋     | 322/695 [00:45<01:40,  3.72it/s][A[A[A[A



 46%|████▋     | 323/695 [00:46<01:43,  3.61it/s][A[A[A[A



 47%|████▋     | 324/695 [00:46<01:41,  3.64it/s][A[A[A[A



 47%|████▋     | 325/695 [00:46<01:42,  3.62it/s][A[A[A[A



 47%|████▋     | 326/695 [00:46<01:42,  3.61it/s][A[A[A[A



 47%|████▋     | 327/695 [00:47<01:42,  3.59it/s][A[A[A[A



 47%|████▋     | 328/695 [00:47<01:42,  3.60it/s][A[A[A[A



 47%|████▋     | 329/695 [00:47<01:41,  3.62it/s][A[A[A[A



 47%|████▋     | 330/695 

 82%|████████▏ | 567/695 [02:15<00:58,  2.20it/s][A[A[A[A



 82%|████████▏ | 568/695 [02:16<00:57,  2.20it/s][A[A[A[A



 82%|████████▏ | 569/695 [02:16<00:57,  2.19it/s][A[A[A[A



 82%|████████▏ | 570/695 [02:17<00:57,  2.18it/s][A[A[A[A



 82%|████████▏ | 571/695 [02:17<00:56,  2.18it/s][A[A[A[A



 82%|████████▏ | 572/695 [02:18<00:56,  2.18it/s][A[A[A[A



 82%|████████▏ | 573/695 [02:18<00:56,  2.18it/s][A[A[A[A



 83%|████████▎ | 574/695 [02:19<00:55,  2.17it/s][A[A[A[A



 83%|████████▎ | 575/695 [02:19<00:55,  2.17it/s][A[A[A[A



 83%|████████▎ | 576/695 [02:19<00:54,  2.17it/s][A[A[A[A



 83%|████████▎ | 577/695 [02:20<00:54,  2.16it/s][A[A[A[A



 83%|████████▎ | 578/695 [02:20<00:54,  2.16it/s][A[A[A[A



 83%|████████▎ | 579/695 [02:21<00:53,  2.15it/s][A[A[A[A



 83%|████████▎ | 580/695 [02:21<00:53,  2.15it/s][A[A[A[A



 84%|████████▎ | 581/695 [02:22<00:53,  2.15it/s][A[A[A[A



 84%|████████▎ | 582/695 

 39%|███▉      | 75/192 [00:03<00:09, 12.78it/s][A[A[A[A[A[A





 40%|████      | 77/192 [00:03<00:08, 13.11it/s][A[A[A[A[A[A





 41%|████      | 79/192 [00:03<00:08, 12.61it/s][A[A[A[A[A[A





 42%|████▏     | 81/192 [00:03<00:09, 12.13it/s][A[A[A[A[A[A





 43%|████▎     | 83/192 [00:04<00:09, 11.76it/s][A[A[A[A[A[A





 44%|████▍     | 85/192 [00:04<00:09, 11.38it/s][A[A[A[A[A[A





 45%|████▌     | 87/192 [00:04<00:09, 10.98it/s][A[A[A[A[A[A





 46%|████▋     | 89/192 [00:04<00:09, 10.67it/s][A[A[A[A[A[A





 47%|████▋     | 91/192 [00:04<00:09, 10.45it/s][A[A[A[A[A[A





 48%|████▊     | 93/192 [00:05<00:10,  9.82it/s][A[A[A[A[A[A





 49%|████▉     | 94/192 [00:05<00:10,  9.73it/s][A[A[A[A[A[A





 49%|████▉     | 95/192 [00:05<00:10,  9.58it/s][A[A[A[A[A[A





 50%|█████     | 96/192 [00:05<00:10,  9.12it/s][A[A[A[A[A[A





 51%|█████     | 97/192 [00:05<00:10,  9.12it/s][A[A[A[A[A

 61%|██████    | 163/269 [00:15<00:20,  5.19it/s][A[A[A[A[A[A[A






 61%|██████    | 164/269 [00:15<00:20,  5.22it/s][A[A[A[A[A[A[A






 61%|██████▏   | 165/269 [00:15<00:19,  5.26it/s][A[A[A[A[A[A[A






 62%|██████▏   | 166/269 [00:15<00:18,  5.56it/s][A[A[A[A[A[A[A






 62%|██████▏   | 167/269 [00:16<00:18,  5.54it/s][A[A[A[A[A[A[A






 62%|██████▏   | 168/269 [00:16<00:17,  5.66it/s][A[A[A[A[A[A[A






 63%|██████▎   | 169/269 [00:16<00:17,  5.69it/s][A[A[A[A[A[A[A






 63%|██████▎   | 170/269 [00:16<00:17,  5.77it/s][A[A[A[A[A[A[A






 64%|██████▎   | 171/269 [00:16<00:16,  5.94it/s][A[A[A[A[A[A[A






 64%|██████▍   | 172/269 [00:16<00:17,  5.68it/s][A[A[A[A[A[A[A






 64%|██████▍   | 173/269 [00:17<00:17,  5.37it/s][A[A[A[A[A[A[A






 65%|██████▍   | 174/269 [00:17<00:17,  5.56it/s][A[A[A[A[A[A[A






 65%|██████▌   | 175/269 [00:17<00:16,  5.68it/s][A[A[A[A[A[A[A







 34%|███▍      | 44/128 [00:01<00:04, 20.77it/s][A[A[A[A[A[A[A[A[A








 37%|███▋      | 47/128 [00:01<00:04, 20.10it/s][A[A[A[A[A[A[A[A[A








 39%|███▉      | 50/128 [00:01<00:03, 20.08it/s][A[A[A[A[A[A[A[A[A








 41%|████▏     | 53/128 [00:02<00:03, 18.98it/s][A[A[A[A[A[A[A[A[A








 43%|████▎     | 55/128 [00:02<00:04, 17.96it/s][A[A[A[A[A[A[A[A[A








 45%|████▍     | 57/128 [00:02<00:04, 17.00it/s][A[A[A[A[A[A[A[A[A








 46%|████▌     | 59/128 [00:02<00:04, 16.35it/s][A[A[A[A[A[A[A[A[A








 48%|████▊     | 61/128 [00:02<00:04, 15.59it/s][A[A[A[A[A[A[A[A[A








 49%|████▉     | 63/128 [00:02<00:04, 15.01it/s][A[A[A[A[A[A[A[A[A








 51%|█████     | 65/128 [00:02<00:04, 14.52it/s][A[A[A[A[A[A[A[A[A








 52%|█████▏    | 67/128 [00:03<00:04, 14.01it/s][A[A[A[A[A[A[A[A[A








 54%|█████▍    | 69/128 [00:03<00:04, 13.54it/s][A[A[A[A[A[A[A[A[A








In [45]:
timeTaken

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 7215.97,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [48]:
np.average(np.array(timeTaken))

360.7985

In [50]:
timeTaken

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 7215.97,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 6.02,
 0.0,
 0.0,
 29.22,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 24738.58,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 212136.73,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 7429.45,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 193.63,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [51]:
np.average(np.array(timeTaken))

2568.8734693877554