In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array
from tqdm import tqdm
import numpy as np

In [2]:
files=pd.read_json('../../val_files_801010.json')

In [3]:
len(files)

19968

In [4]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

def modelload(param,path_to_model):
    class MLP(nn.Module):
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out
        
    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)
    newmodel.load_state_dict(torch.load(path_to_model))
    return newmodel
    

In [5]:
def split_sequences(sequences):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + 1
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [22]:
newmodel= modelload((856, 3, 256, 0.5),'./state_dict_1.pt')
newmodel.eval()
stepsize=40

n_timesteps=30
batch_size = 100
epoch_val=files[0]
epoch_size=len(files[0])
listmean=[]

In [25]:
for number in tqdm(range(int(epoch_size/stepsize))):
    val_x= np.empty((0,1,856), int)
    val_y= np.empty((0,), int)
    startno=number*stepsize
    for i in (epoch_val[startno:startno+stepsize]):
        joineddf=pd.read_feather('../../processed3-edited/'+i)
        joineddf=joineddf.fillna(0)
        tnp=joineddf[[c for c in joineddf if c not in ['Retweets']] 
               + ['Retweets']].to_numpy()
        valnpx,valnpy=split_sequences(tnp)

        val_x = np.append(val_x, valnpx, axis=0)
        val_y = np.append(val_y, valnpy, axis=0)
    
    val_x=torch.Tensor(val_x).double().to(device)
    predictions = newmodel(val_x)
    listmean.append(mean_squared_log_error(val_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
    predictions=[]
    pd.DataFrame(listmean).to_csv('./val_linear_model_1/mean'+str(number)+'.csv')

100%|██████████| 499/499 [13:27<00:00,  1.62s/it]


In [26]:
listmean

[6.982698980408835,
 6.982698980408835,
 7.198375526697795,
 7.1052580297004795,
 7.178393085028102,
 7.112705315408259,
 7.122917243089219,
 6.982698980408835,
 7.198375526697795,
 7.1052580297004795,
 7.178393085028102,
 7.112705315408259,
 7.122917243089219,
 6.856890260136612,
 7.170122290037911,
 7.063671077153827,
 7.031605968421627,
 6.897032366370466,
 7.01819079605545,
 7.23386446951555,
 7.172245584714545,
 7.148953398278132,
 7.092335726973863,
 7.012371226295185,
 7.123428568381153,
 7.068943154787721,
 7.2587572711212625,
 7.085326602010295,
 7.11693177909296,
 7.150750413290707,
 7.065063760718403,
 7.152261484015491,
 7.25345002138625,
 7.276845375246288,
 7.137836110768322,
 7.153765533140776,
 7.043112528762442,
 7.010133230203936,
 7.1251685620964,
 7.156168952650525,
 7.301659094142779,
 7.006009774753519,
 7.129143744646501,
 7.241308875834528,
 7.095217239003707,
 7.057640970644402,
 7.108446581255127,
 6.8322952896948355,
 7.116354024931661,
 7.1831109008860485,
 

In [27]:
np.mean(listmean)

7.0977785028915354