In [2]:
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
from numpy import array

In [3]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")


In [4]:
def modelload(param,path_to_model,path_to_df):
    class MLP(nn.Module):
        def __init__(self, input_size, num_hidden, hidden_dim, dropout):
            super(MLP, self).__init__()
            self.hidden_layers = nn.ModuleList([])
            self.hidden_layers.append(nn.Linear(input_size, hidden_dim))
            for i in range(num_hidden - 1):
                self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim))
            self.dropout = nn.Dropout(dropout)
            self.output_projection = nn.Linear(hidden_dim, 1)
            self.nonlinearity = nn.ReLU()

        def forward(self, x):
            for hidden_layer in self.hidden_layers:
                x = hidden_layer(x)
                x = self.dropout(x)
                x = self.nonlinearity(x)
            out = self.output_projection(x)
            return out

    newmodel = MLP(param[0],param[1],param[2],param[3]).double()
    newmodel.to(device)

    newmodel.load_state_dict(torch.load(path_to_model))
    newmodel.eval()


    
    df=pd.read_feather(path_to_df)
    print(df)
    def split_sequences(sequences):
        X, y = list(), list()
        for i in range(len(sequences)):
            # find the end of this pattern
            end_ix = i + 1
            # check if we are beyond the dataset
            if end_ix > len(sequences):
                break
            # gather input and output parts of the pattern
            seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
            X.append(seq_x)
            y.append(seq_y)
        return array(X), array(y)
    
    df2=df[[c for c in df if c not in ['Retweets']] 
           + ['Retweets']]
    batch_size=100  #tweak for number of files you want
    x1,y1=split_sequences(df2.fillna(0).to_numpy())#for the whole df
    print(x1)
    x1=torch.Tensor(x1).double().to(device)
    predictions = newmodel(x1)
    print(predictions)

In [13]:
# change the data file to whichever one you would like to try
modelload((856, 3, 256, 0.5),'./model_3/state_3.pt','../../processed3-edited/dataset/data_188489.ftr')

    usernamehash_col0  usernamehash_col1  usernamehash_col2  \
0                   0                  0                  0   
1                   0                  0                  0   
2                   0                  0                  0   
3                   0                  0                  0   
4                   0                  0                  0   
..                ...                ...                ...   
95                  0                  0                  0   
96                  0                  0                  0   
97                  0                  0                  0   
98                  0                  0                  0   
99                  0                  0                  0   

    usernamehash_col3  usernamehash_col4  usernamehash_col5  \
0                   0                  0                  0   
1                   0                  0                  0   
2                   0                  0              