In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array
from tqdm import tqdm
import numpy as np
from matplotlib.pyplot import *

In [2]:
files=pd.read_json('../../test_files_801010.json')

In [3]:
len(files)

19967

In [4]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

def modelload(param,path_to_model):
    class MLP(nn.Module):
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out
        
    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)
    newmodel.load_state_dict(torch.load(path_to_model))
    return newmodel
    

In [5]:
def split_sequences(sequences):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + 1
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [6]:
newmodel= modelload((856, 3, 256, 0.5),'./model3/state_dict_3.pt')
newmodel.eval()
stepsize=40

n_timesteps=30
batch_size = 100
epoch_test=files[0]
epoch_size=len(files[0])
listmean=[]

In [7]:
for number in tqdm(range(int(epoch_size/stepsize))):
    test_x= np.empty((0,1,856), int)
    test_y= np.empty((0,), int)
    startno=number*stepsize
    for i in (epoch_test[startno:startno+stepsize]):
        joineddf=pd.read_feather('../../processed3-edited/'+i)
        joineddf=joineddf.fillna(0)
        tnp=joineddf[[c for c in joineddf if c not in ['Retweets']] 
               + ['Retweets']].to_numpy()
        testnpx,testnpy=split_sequences(tnp)

        test_x = np.append(test_x, testnpx, axis=0)
        test_y = np.append(test_y, testnpy, axis=0)
    
    test_x=torch.Tensor(test_x).double().to(device)
    predictions = newmodel(test_x)
    listmean.append(mean_squared_log_error(test_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
    pd.DataFrame(listmean).to_csv('./best_linear_model/mean'+str(number)+'.csv')

100%|██████████| 499/499 [13:24<00:00,  1.61s/it]


In [8]:
listmean

[1.8952779309039676,
 1.9401703712228928,
 1.9323914560041526,
 1.936630668741853,
 1.9461180936867803,
 1.9510185433064609,
 1.9065114908186602,
 1.8685539872260646,
 1.92186006505223,
 1.881184840188092,
 1.975107935269115,
 1.9196377207670694,
 1.9467596982872555,
 1.9087917390257263,
 1.9312441791967578,
 1.8954574324970463,
 1.8908368886781668,
 1.9174333374334869,
 1.9346280995408525,
 1.8799620228321436,
 1.906497397375746,
 1.8996719690982593,
 1.897891436370346,
 1.9451483163863168,
 1.8891135865626747,
 1.9113541480939056,
 1.9634468365186721,
 1.9471107109802739,
 1.9041497086699724,
 1.914717639267104,
 1.9315277845819172,
 1.9138692668097217,
 1.932705712690698,
 1.9082021833372573,
 1.961475025683063,
 1.9266683692618756,
 1.8796455879589784,
 1.8873777027960725,
 1.9222941879172504,
 1.9138414945563407,
 1.9036076613702695,
 1.8922440086080647,
 1.9004736812910992,
 1.9138050456538336,
 1.8574994302931354,
 1.8879640279643546,
 1.9432200809243736,
 1.931155505116216,
 1.

In [9]:
np.mean(listmean)

1.919262166935468