In [9]:
import pandas as pd
import numpy as np

import lime
import lime.lime_tabular

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [88]:
class Network(nn.Module):
    def __init__(self, in_dim, out_dim=2):
        super().__init__()
        
        self.hidden1 = nn.Linear(in_dim, 32)
        self.hidden2 = nn.Linear(32, 16)
        self.output = nn.Linear(16, out_dim)
        
        self.activation = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.hidden1(x)
        x = self.activation(x)
        x = self.hidden2(x)
        x = self.activation(x)
        x = self.output(x)
        #print(x.squeeze().shape)
        #x = self.softmax(x)
        
        return x

In [89]:
class FakeNewsDataset(Dataset):
    def __init__(self, df):
        self.data = df.drop(columns=['target'])
        self.targets = df['target'].astype(int)
    
    def __getitem__(self, i):
        x = torch.tensor(self.data.iloc[i]).float()
        y = torch.tensor(self.targets.iloc[i]).long()
        return x, y
    
    def __len__(self):
        return len(self.data)
    

In [90]:
train_df = pd.read_csv('embeddings/inference_train_df.csv')
test_df = pd.read_csv('embeddings/inference_test_df.csv')

In [91]:
input_dim = 205
model_path = "models/fake-news-classifier.pt" 

model = Network(input_dim, 2)
model.load_state_dict(torch.load(model_path))

train_dataset = FakeNewsDataset(train_df)
val_dataset = FakeNewsDataset(test_df)

In [103]:
def predict(instance):
    x = torch.tensor(instance).float()
    print(x.shape)
    logits = model(x)
    # probs = F.softmax(logits, dim=1)
    return logits.detach().numpy()

In [104]:
explainer = lime.lime_tabular.LimeTabularExplainer(train_df,
                                                   mode='classification',
                                                   feature_names=train_df.drop(columns='target').columns, 
                                                   class_names=['real','fake'], 
                                                   discretize_continuous=False)

In [105]:
y = test_df.drop(columns=['target']).iloc[i]
predict(y)

torch.Size([205])


array([ 2.6748438, -4.096123 ], dtype=float32)

In [107]:
i = np.random.randint(0, len(test_df))
exp = explainer.explain_instance(test_df.drop(columns=['target']).iloc[i], predict, num_features=2, top_labels=1)

ValueError: operands could not be broadcast together with shapes (5000,205) (206,) 