In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array
from tqdm import tqdm
import numpy as np

In [2]:
files=pd.read_json('../../val_files_801010.json')

In [3]:
len(files)

19968

In [4]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

def modelload(param,path_to_model):
    class MLP(nn.Module):
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out
        
    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)
    newmodel.load_state_dict(torch.load(path_to_model))
    return newmodel
    

In [6]:
def split_sequences(sequences):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + 1
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [7]:
newmodel= modelload((856, 3, 256, 0.5),'./state_dict_3.pt')
newmodel.eval()
stepsize=40
n_timesteps=30
batch_size = 100
epoch_val=files[0]
epoch_size=len(files[0])
listmean=[]

In [8]:
for number in tqdm(range(int(epoch_size/stepsize))):
    val_x= np.empty((0,1,856), int)
    val_y= np.empty((0,), int)
    startno=number*stepsize
    for i in (epoch_val[startno:startno+stepsize]):
        joineddf=pd.read_feather('../../processed3-edited/'+i)
        joineddf=joineddf.fillna(0)
        tnp=joineddf[[c for c in joineddf if c not in ['Retweets']] 
               + ['Retweets']].to_numpy()
        valnpx,valnpy=split_sequences(tnp)

        val_x = np.append(val_x, valnpx, axis=0)
        val_y = np.append(val_y, valnpy, axis=0)
    
    val_x=torch.Tensor(val_x).double().to(device)
    predictions = newmodel(val_x)
    listmean.append(mean_squared_log_error(val_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
    predictions=[]
    pd.DataFrame(listmean).to_csv('./val_linear_model_4/mean'+str(number)+'.csv')

100%|██████████| 499/499 [12:36<00:00,  1.52s/it]


In [9]:
listmean

[1.9010971787990765,
 1.9245916992754961,
 1.8805947636167777,
 1.8859565417654371,
 1.9976310614540984,
 1.9339388030126965,
 1.8753901703438307,
 1.9525461323205346,
 1.9573555464314372,
 1.9265372711214546,
 1.892212977042809,
 1.8623702124204533,
 1.894709909033321,
 1.8824685403498287,
 1.9259151280056808,
 1.9063614637724842,
 1.8829093661653324,
 1.8952534016830578,
 1.884874940551301,
 1.9337831907208116,
 1.9351774314070909,
 1.9374552071131785,
 1.9194141713737711,
 1.9484977198304896,
 1.8694912329423918,
 1.9672358160095578,
 1.911822722049048,
 1.894767497398464,
 1.9149375076148114,
 1.9125850948422092,
 1.9502531929268023,
 1.9704510229625942,
 1.9478782972313298,
 1.957236526841826,
 1.9293987277240694,
 1.9203556876645664,
 1.8708910307095374,
 1.9386603024773226,
 1.9658636241935683,
 1.913223786999837,
 1.906954428625395,
 1.8664931137423506,
 1.920519939773496,
 1.9024834252797789,
 1.9467392197137783,
 1.8782843692495361,
 1.9228753488978552,
 1.9847313443478225,
 

In [10]:
np.mean(listmean)

1.9196497995619541