In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array
from tqdm import tqdm
import numpy as np
from matplotlib.pyplot import *

In [None]:
files=pd.read_json('../../test_files_801010.json')

In [None]:
len(files)

In [None]:
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

def modelload(param,path_to_model):
    """[Loads an existing model]

    Args:
        param ([Tuple]): [parameters for the model]
        path_to_model ([String]): [filepath to previously saved model]
    Returns:
        [Tensor]: [output results from model]
    """
    class MLP(nn.Module):
        """[Linear Neural Network Model Generator]

        """
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            """[initialise the model class]

            Args:
                input_size ([int]): [number of input features]
                num_hidden ([int]): [number of hidden layers]
                hidden_dim ([int]): [hidden layer dimension]
                dropout (float): [dropout rate].
            """
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            """[Forward for Neural network]

            Args:
                x ([Tensor]): [input tensor for raw values]
            Returns:
                [Tensor]: [output results from model]
            """
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out
        
    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)
    newmodel.load_state_dict(torch.load(path_to_model))
    return newmodel
    

In [None]:
def split_sequences(sequences):
    """[inputs a numpy array]
    Args:
        sequences ([np.array]): [numpy array of data]

    Returns:
        x [np.array]: [returns a numpy array of features]
        y [np.array]: [returns a numpy array of labels]
    """
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + 1
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [None]:
newmodel= modelload((856, 3, 256, 0.5),'./model3/state_dict_3.pt')
newmodel.eval()
stepsize=40

n_timesteps=30
batch_size = 100
epoch_test=files[0]
epoch_size=len(files[0])
listmean=[]

In [None]:
for number in tqdm(range(int(epoch_size/stepsize))):
    test_x= np.empty((0,1,856), int)
    test_y= np.empty((0,), int)
    startno=number*stepsize
    for i in (epoch_test[startno:startno+stepsize]):
        joineddf=pd.read_feather('../../processed3-edited/'+i)
        joineddf=joineddf.fillna(0)
        tnp=joineddf[[c for c in joineddf if c not in ['Retweets']] 
               + ['Retweets']].to_numpy()
        testnpx,testnpy=split_sequences(tnp)

        test_x = np.append(test_x, testnpx, axis=0)
        test_y = np.append(test_y, testnpy, axis=0)
    
    test_x=torch.Tensor(test_x).double().to(device)
    predictions = newmodel(test_x)
    listmean.append(mean_squared_log_error(test_y, predictions.cpu().detach().numpy().clip(min=0).squeeze()))
    pd.DataFrame(listmean).to_csv('./best_linear_model/mean'+str(number)+'.csv')

In [None]:
listmean

In [None]:
np.mean(listmean)