In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array
from tqdm import tqdm
import numpy as np

In [2]:
files=pd.read_json('../../val_files_801010.json')

In [3]:
len(files)

19968

In [4]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

def modelload(param,path_to_model):
    class MLP(nn.Module):
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out
        
    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)
    newmodel.load_state_dict(torch.load(path_to_model))
    return newmodel
    

In [5]:
def split_sequences(sequences):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + 1
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [6]:
newmodel= modelload((856, 3, 256, 0.6),'./state_dict_2.pt')
newmodel.eval()
stepsize=40

n_timesteps=30
batch_size = 100
epoch_val=files[0]
epoch_size=len(files[0])
listmean=[]

In [7]:
for number in tqdm(range(int(epoch_size/stepsize))):
    val_x= np.empty((0,1,856), int)
    val_y= np.empty((0,), int)
    startno=number*stepsize
    for i in (epoch_val[startno:startno+stepsize]):
        joineddf=pd.read_feather('../../processed3-edited/'+i)
        joineddf=joineddf.fillna(0)
        tnp=joineddf[[c for c in joineddf if c not in ['Retweets']] 
               + ['Retweets']].to_numpy()
        valnpx,valnpy=split_sequences(tnp)

        val_x = np.append(val_x, valnpx, axis=0)
        val_y = np.append(val_y, valnpy, axis=0)
    
    val_x=torch.Tensor(val_x).double().to(device)
    predictions = newmodel(val_x)
#     print(len(predictions))
#     print(len(val_y))
#     print(predictions.cpu().detach().numpy().clip(min=0).squeeze())
#     print(mean_squared_log_error(val_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
    listmean.append(mean_squared_log_error(val_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
#     print(listmean)
    predictions=[]
    pd.DataFrame(listmean).to_csv('./val_linear_model_2/mean'+str(number)+'.csv')

100%|██████████| 499/499 [13:20<00:00,  1.60s/it]


In [8]:
listmean

[2.4382186322699737,
 2.414577531519349,
 2.360174362884378,
 2.3279874518613886,
 2.5012132111736007,
 2.3641162076625504,
 2.357883606809947,
 2.4414651163752725,
 2.449855286943026,
 2.424154103768424,
 2.350562024701847,
 2.3030096324648337,
 2.431400022042738,
 2.3940281591312913,
 2.4425178426826895,
 2.342332349974361,
 2.389895421479406,
 2.3686976566609266,
 2.33898158436351,
 2.385326655428712,
 2.4378185066089197,
 2.4383990976109584,
 2.354316373291408,
 2.3560176140033464,
 2.345199791618695,
 2.4156177883781593,
 2.448127046299189,
 2.367845110316299,
 2.338280766237465,
 2.4314721640822117,
 2.3543543120669295,
 2.3921888330028405,
 2.429712802198716,
 2.363048695631854,
 2.419717466275969,
 2.3641417971728615,
 2.3916720729898957,
 2.4516685147072153,
 2.4087236360674322,
 2.4455158517288056,
 2.426369759865033,
 2.3784474084672413,
 2.40516691315242,
 2.3938472530591133,
 2.361706596827081,
 2.4408216747846345,
 2.4342861225594143,
 2.515206431810464,
 2.37513327525274

In [9]:
np.mean(listmean)

2.3978115134565687