## Reference Materials

https://github.com/marcotcr/lime?tab=readme-ov-file

In [2]:
import sklearn
import sklearn.datasets
import sklearn.ensemble
import numpy as np
import lime
import lime.lime_tabular
from __future__ import print_function


In [3]:
import pandas as pd
import argparse, os
import matplotlib.pyplot as plt
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import DataLoader 

  from .autonotebook import tqdm as notebook_tqdm


In [48]:
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--layer", type=float, default=3)
    parser.add_argument("--hidden", type=int, default=128)
    parser.add_argument("--hidden1", type=int, default=64)
    parser.add_argument("--hidden2", type=int, default=32)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--seed", type=float, default=42)
    parser.add_argument("--epoch", type=int, default=50)
    
    return parser.parse_args()

In [4]:
class Q_vecDataset(torch.utils.data.Dataset):
  def __init__(self, X_rus, y_rus, train):
        self.feature = X_rus
        self.target = y_rus
        self.train = train

  def __len__(self):
        return self.target.shape[0]

  def __getitem__(self, index):
        X = self.feature.iloc[[index]]
        X = X.to_numpy()
        y = self.target.iloc[[index]]
        y= y.to_numpy()

        return X, y

In [5]:
def prepare_data():
    q_cleaned_old = pd.read_csv('data/data_vectorized_240228.csv')
    q_cleaned_old.drop(['ia_status_Facility Study', 'ia_status_Feasibility Study',
        'ia_status_IA Executed', 'ia_status_Operational',
        'ia_status_System Impact Study', 'Unnamed: 0'], axis = 1, inplace=True)

    exempt = []
    for col in list(q_cleaned_old.columns):
        if q_cleaned_old[col].max() < 1:
            exempt.append(col)
    q_cleaned_old.drop(columns = exempt, inplace=True)
    
    # Use batch normalization here - subtract by mean of data + divide by variance
    scaler = StandardScaler()
    scaler.fit(q_cleaned_old)
    q_cleaned_array = scaler.transform(q_cleaned_old)
    q_cleaned = pd.DataFrame(q_cleaned_array, columns=q_cleaned_old.columns)
    

    features = q_cleaned.drop(['ia_status_Withdrawn'], axis = 1)
    target = q_cleaned_old['ia_status_Withdrawn']

    seed = 42

    rus = RandomUnderSampler(random_state=seed)
    X_rus, y_rus= rus.fit_resample(features, target)
    X_train, X_test, y_train, y_test = train_test_split(X_rus, y_rus,
                                                            test_size = 0.2,
                                                            random_state = seed)
    return X_train, X_test, y_train, y_test

In [9]:
%cd ..

c:\Users\celin\git\Quennect


In [10]:
X_train, X_test, y_train, y_test = prepare_data()

In [43]:
X_train = np.array(X_train)

In [44]:
X_train

array([[-0.08696952, -0.11717451, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [-0.25977336,  1.02335176, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 0.9498535 ,  0.53455479, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       ...,
       [ 0.08583431, -0.44303915, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 0.60424583,  0.53455479, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 0.25863815,  0.20869014, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396]])

In [38]:
classes = np.array(['Not Withdrawn', 'Withdrawn'])

In [18]:
q_cleaned_old = pd.read_csv('data/data_vectorized_240228.csv')
q_cleaned_old.drop(['ia_status_Facility Study', 'ia_status_Feasibility Study',
        'ia_status_IA Executed', 'ia_status_Operational',
        'ia_status_System Impact Study', 'Unnamed: 0'], axis = 1, inplace=True)
q_cleaned_old.drop(['ia_status_Withdrawn'], axis = 1)

Unnamed: 0,q_year,prop_year,region_CAISO,region_ERCOT,region_ISO-NE,region_MISO,region_NYISO,region_PJM,region_SPP,region_Southeast (non-ISO),...,util_tsgt,util_ugi,util_unknown,util_upper peninsula power company,util_vea,util_wabash valley power,util_wisconsin electric power company,util_wisconsin public service corporation,"util_wolverine power supply cooperative, inc.",util_xcel energy
0,2016.0,2018.0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,2008.0,2011.0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,2019.0,2023.0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2009.0,2010.0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020.0,2023.0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5475,2005.0,2007.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5476,2008.0,2010.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5477,2003.0,2006.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5478,2007.0,2007.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [62]:
feature_names = q_cleaned_old.columns

In [63]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=feature_names, class_names=classes, discretize_continuous=True)

In [80]:
class NeuralNetwork(nn.Module): # nn.Module = base case for all neural network modules
    # we define model as a subclass of nn.Module -> it creates parameters of the modules with utility methods like eval()
        def __init__(self):
            super().__init__()
            self.flatten = nn.Flatten()
            self.linear_relu_stack = nn.Sequential(
                nn.Linear(140, int(128)), # apply linear transformation to the incoming data : y = x*W^T+b
                                        # weight here will be size of output * input
                nn.ReLU(),  # rectified linear unit function: 0 for values < 0 and linear function if > 0
                nn.Linear(int(128), int(64)),
                nn.ReLU(),
                nn.Linear(64, 32),
                nn.ReLU(),
                nn.Linear(32, 64),
                nn.ReLU(),
                nn.Linear(64, 2),
            )
            #self.sig = nn.Sigmoid() 
            #self.softmax = nn.Softmax(dim=1)
            # TODO: BCELoss does not expect raw logits - every value should be in the range [0,1].
            # TODO: Check what the previous model was doing, if there was regularization, learning rate, etc.
            
        def forward(self, x): 
            x = self.flatten(x) # collapse into one dimensions
            x = self.linear_relu_stack(x)
            #x = self.sig(x)
            #x = self.softmax(x)
            return x # changed to squeeze

In [65]:
filepath = os.path.join("model", "epoch5.pt")
the_model = NeuralNetwork()
the_model = torch.load(filepath)

In [66]:
the_model.eval()

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=140, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=64, bias=True)
    (7): ReLU()
    (8): Linear(in_features=64, out_features=2, bias=True)
  )
)

In [81]:
X_test = np.array(X_test)

In [82]:
X_test

array([[-0.77818487, -1.09476845, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 0.9498535 ,  0.53455479, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 0.9498535 ,  0.86041944,  2.33551976, ..., -0.03576319,
        -0.02340396, -0.02340396],
       ...,
       [-2.85183092, -2.56115936, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [-0.60538103, -1.25770077, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396],
       [ 1.29546117,  1.34921641, -0.42817022, ..., -0.03576319,
        -0.02340396, -0.02340396]])

In [79]:
#i = np.random.randint(0, X_test.shape[0])
exp = explainer.explain_instance(X_test[1], the_model, num_features=2, top_labels=1, num_samples=1000)

TypeError: flatten() takes from 0 to 1 positional arguments but 2 were given