# PyTorch Classification Model

https://stackabuse.com/introduction-to-pytorch-for-classification/
https://jovian.ml/aakanksha-ns/shelter-outcome

In [6]:
#!pip install pytorch_lightning
#!pip install lightning

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as torch_optim
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.utils.data import DataLoader,Dataset,ConcatDataset
from torchmetrics import Accuracy

from sklearn.preprocessing import MinMaxScaler,OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score,classification_report,
                            confusion_matrix, ConfusionMatrixDisplay,
                            roc_auc_score)
from time import time
import os


## Setup

In [8]:
class OutletBiasDataset(Dataset):
    """
    Dataset class for PyTorch's, allowing us to work with the csv dataset.
    
    From the docs:
    All datasets that represent a map from keys to data samples should subclass it. 
    All subclasses should overwrite __getitem__(), supporting fetching a data sample 
    for a given key. Subclasses could also optionally overwrite __len__(), which is 
    expected to return the size of the dataset by many Sampler implementations and 
    the default options of DataLoader.
    
    """
    def __init__(self, data, numer_cols, categ_cols, label_col):
        data = data.copy()
        #numerical columns
        self.X_num = data.loc[:,numer_cols].copy().values.astype(np.float32)
        #categorical columns
        self.X_cat = data.loc[:,categ_cols].copy().values.astype(np.int64)
        self.y = data[label_col]
        self.index = data.index

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.index[idx], self.X_num[idx], self.X_cat[idx], self.y[idx]
    

In [9]:
class TabularDataModule(pl.LightningDataModule):
    """
    The Pytorch DataModule enables reusing classes, while incorporating
    all preprocessing steps: making datasets into OutletBiasDataset class instances.
    The DataLoaders are also made here, as required by PyTorch and PyTorch Lightning
    """

    def __init__(self, datadir, dataset_extension,
                 num_cols, cat_cols, label_col, num_workers=2,
                 batch_size_train=16, batch_size_val=16, batch_size_test=16):
        
        super().__init__()
        os.chdir(datadir)
        # datasets
        self.train = pd.read_csv("train{}.csv".format(dataset_extension))
        self.train.set_index("outlet", inplace=True)
        
        self.val = pd.read_csv("val{}.csv".format(dataset_extension))
        self.val.set_index("outlet", inplace=True)
        
        self.test = pd.read_csv("test{}.csv".format(dataset_extension))
        self.test.set_index("outlet", inplace=True)
        
        self.full = pd.concat([self.train,self.val,self.test])
        
        # define other variables
        self.num_cols = num_cols
        self.cat_cols = cat_cols
        self.label_col = label_col
        self.num_workers = num_workers
        self.batch_size_train = batch_size_train
        self.batch_size_val = batch_size_val
        self.batch_size_test = batch_size_test

    def setup(self, stage=""):
        # check dataset shapes
        print('Train set shape: ', self.train.shape)
        print('Validation set shape: ', self.val.shape)
        print('Test set shape: ', self.test.shape)
        
        # MAKE INTO DATASET FORMAT
        self.dataset_train = OutletBiasDataset(self.train,
                                               self.num_cols,self.cat_cols, self.label_col)
        self.dataset_val = OutletBiasDataset(self.val,
                                               self.num_cols,self.cat_cols, self.label_col)
        self.dataset_test = OutletBiasDataset(self.test,
                                               self.num_cols,self.cat_cols, self.label_col)
        self.dataset_full = OutletBiasDataset(self.full,
                                       self.num_cols,self.cat_cols, self.label_col)

    def train_dataloader(self):
        return DataLoader(
            self.dataset_train,
            num_workers=self.num_workers,
            batch_size=self.batch_size_train,
            shuffle=True
        )

    def val_dataloader(self):
        return DataLoader(
            self.dataset_val,
            num_workers=self.num_workers,
            batch_size=self.batch_size_val,
            shuffle=False
        )

    def test_dataloader(self):
        return DataLoader(
            self.dataset_test,
            num_workers=self.num_workers,
            batch_size=self.batch_size_test,
            shuffle=False
        )
    
    def full_dataloader(self):
        return DataLoader(
            self.dataset_full,
            num_workers=self.num_workers,
            batch_size=self.batch_size_train,
            shuffle=False
        )

In [10]:
class TabularNetModel(pl.LightningModule):
    """
    Model class in PyTorch Lightning. Model structure is defined in self.layers().
    
    Initialize with:
    - num_cols: columns in dataset that are numeric,
    - cat_cols: columns in dataset that are categorical (and thus will be embedded),
    - embedding_size_dict: predetermined embedding sizes per categorical feature,
    - n_classes: number of classes to be classified,
    - learning_rate: model's learning rate,
    - neurons_per_layer_list: list of neurons to be put per layer, for any number of layers,
    - dropout_p: dropout probability between each layer
    
    """
    def __init__(self, num_cols, cat_cols, embedding_size_dict, n_classes,
                 neurons_per_layer_list=[512,256,128],
                 learning_rate=0.001, dropout_p=0.2):
        super().__init__()
        
        # pytorch lightning black magic, all the arguments can now be
        # accessed through self.hparams.[argument]
        self.save_hyperparameters()
        self.num_cols = num_cols
        self.num_len = len(num_cols)
        self.cat_cols = cat_cols
        self.cat_len = len(cat_cols)
        self.n_classes = n_classes
        
        self.embeddings, total_embedding_dim = self._create_embedding_layers(
            cat_cols, embedding_size_dict)
        
        # concatenate the numerical variables and the embedding layers
        # then proceed with the rest of the sequential flow
        in_features = self.num_len + total_embedding_dim
        
        # let's finally define the model architecture itself:
        all_layers = []
        for i in neurons_per_layer_list:
            all_layers.append(nn.Linear(in_features, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(dropout_p))
            in_features = i
            
        # add final output layer,
        all_layers.append(nn.Linear(neurons_per_layer_list[-1], n_classes))
        # and put into Sequantial.
        self.layers = nn.Sequential(*all_layers)
        
        # something to keep track of accuracy with...
        self.val_accuracy = Accuracy(task="multiclass", num_classes=self.n_classes)
        self.test_accuracy = Accuracy(task="multiclass", num_classes=self.n_classes)

    @staticmethod
    def _create_embedding_layers(cat_cols, embedding_size_dict):
        """construct the embedding layer, 1 per each categorical variable"""
        total_embedding_dim = 0 # keep track of the embed dim for input size
        embeddings = {}
        for col in cat_cols:
            # get embedding size and dim ("size" here is the num of classes)
            embedding_size = embedding_size_dict[col][0]
            embedding_dim = embedding_size_dict[col][1] # dim is vector size
            total_embedding_dim += embedding_dim
            # we add 2 to the output size due to some PyTorch shenanigans, see:
            # https://discuss.pytorch.org/t/solved-assertion-srcindex-srcselectdimsize-failed-on-gpu-for-torch-cat/1804/13
            embeddings[col] = nn.Embedding(embedding_size+2, embedding_dim)

        return nn.ModuleDict(embeddings), total_embedding_dim

    def forward(self, num_tensor, cat_tensor):
        # run through all the categorical variables through its
        # own embedding layer and concatenate them together
        cat_outputs = []
        for i, col in enumerate(self.hparams.cat_cols):
            embedding = self.embeddings[col]
            cat_input = cat_tensor[:, i].long() #!
            cat_output = embedding(cat_input)
            cat_outputs.append(cat_output)

        # concatenate to torch tensor with dim=1
        cat_outputs = torch.cat(cat_outputs, dim=1)
        # concatenate the categorical embedding and numerical layer
        all_outputs = torch.cat((num_tensor, cat_outputs), dim=1)
        
        # now we get the final outputs through the layers we made earlier
        final_outputs = self.layers(all_outputs).squeeze(dim=1)
        
        return F.log_softmax(final_outputs, dim=1)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), 
                                lr=self.hparams.learning_rate)
    
    def training_step(self, batch, batch_idx):
        index, num_tensor, cat_tensor, label_tensor = batch
        output_tensor = self(num_tensor, cat_tensor)
        # compute loss
        loss = F.nll_loss(output_tensor, label_tensor)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        index, num_tensor, cat_tensor, label_tensor = batch
        output_tensor = self(num_tensor, cat_tensor)
        # compute loss
        loss = F.nll_loss(output_tensor, label_tensor)
        self.log("val_loss", loss, prog_bar=True)
        # compute accuracy
        preds = torch.argmax(output_tensor, dim=1)
        self.val_accuracy.update(preds, label_tensor)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        index, num_tensor, cat_tensor, label_tensor = batch
        output_tensor = self(num_tensor, cat_tensor)
        # compute loss
        loss = F.nll_loss(output_tensor, label_tensor)
        self.log('test_loss', loss, prog_bar=True)
        # compute accuracy
        preds = torch.argmax(output_tensor, dim=1)
        self.val_accuracy.update(preds, label_tensor)
        self.log("test_acc", self.val_accuracy, prog_bar=True)


## Main

In [None]:
#------------------------------------------------------------------------------
# IMPORT DATA FOR MAKING CUSTOM EMBEDDINGS
#------------------------------------------------------------------------------
# make list of class names in correct order
global class_names
class_names = ["left","left center","least biased","right center","right"] #["left","center","right"]# in order from MBFC mappings

dataset_extension = "_mbfc_allbias_extrafeatures"

#------------------------------------------------------------------------------
# GET CATEGORICAL FEATURES TO EMBED AND DETERMINE EMBEDDING SIZES
#------------------------------------------------------------------------------

# we need to make sure we subset the data from which we get the embeddings to 
# the columns present in training set
train = pd.read_csv("GDELT_GKG/data/train{}.csv".format(dataset_extension))
train.set_index("outlet", inplace=True)
train_cols = train.columns

# get and subset data
data = pd.read_csv("GDELT_GKG/extras/mbfc_outlet_sentiments.csv")
data.set_index("outlet", inplace=True)
data.rename({"Latitude":"Latutude"},axis=1,inplace=True) # wrongly named column, replace
data = data[train_cols]

# get categorical and numerical columns
label_col = "lean"
cat_cols = ["Factuality","PressFreedom","MediaType","Traffic","Credibility"]
num_cols = data.columns[~data.columns.isin(cat_cols + [label_col])]
feature_cols = data.columns[~data.columns.isin([label_col])]

# for easier processing, let's temporarily set cat column types to "category"
data[cat_cols] = data[cat_cols].astype('category')
# get number of classes per categorical column
classes_per_col = {name: len(col.cat.categories) 
                   for name,col in  data[cat_cols].items()}
# determine embedding sizes: embedding size rule from fastai and: http://ethen8181.github.io/machine-learning/deep_learning/tabular/tabular.html
embedding_sizes = {col_name: (n_categories, 
                              min(600, round(1.6 * n_categories ** 0.56))
                             )
                   for col_name,n_categories in classes_per_col.items()}
# another source: https://ai.stackexchange.com/questions/28564/how-to-determine-the-embedding-size

# visualize
print(embedding_sizes)

del classes_per_col, data

In [12]:
#------------------------------------------------------------------------------
# MAKE DATASET MODULE
#------------------------------------------------------------------------------

tabular_data_module = TabularDataModule("GDELT_GKG/data", 
                                        dataset_extension=dataset_extension,
                                        num_cols=num_cols, cat_cols=cat_cols, label_col=label_col, 
                                        num_workers=2,
                                        batch_size_train=32, 
                                        batch_size_val=32, 
                                        batch_size_test=32)

In [None]:
#------------------------------------------------------------------------------
# MAKE MODEL(S)
#------------------------------------------------------------------------------

# we can print out the network architecture for inspection
tabular_model = TabularNetModel(num_cols, cat_cols, embedding_sizes, 
                               n_classes=len(class_names),neurons_per_layer_list = [512,256], dropout_p=0.5)
# for viewing architecture:
tabular_model

In [None]:
#------------------------------------------------------------------------------
# MODEL TRAINING
#------------------------------------------------------------------------------

from pytorch_lightning.callbacks import (EarlyStopping,LearningRateFinder,ModelCheckpoint)

callbacks = [EarlyStopping(monitor='val_acc', 
                           min_delta=0.001, 
                           patience=10,
                           mode="max"),
             # finds optimal learning rate automatically
             LearningRateFinder(min_lr=1e-08, 
                               max_lr=1, 
                               num_training_steps=100, 
                               mode='exponential', 
                               early_stop_threshold=4.0),
            # saves top-K checkpoints based on "val_acc" metric
            ModelCheckpoint(
                save_top_k=5,
                monitor="val_acc",
                mode="max",
                dirpath="lightning_logs/{}_1505".format(dataset_extension),
                filename="TabularModel-{epoch:02d}-{val_acc:.2f}",
            )]

trainer = pl.Trainer(max_epochs=100, 
                     min_epochs=5,
                     callbacks=callbacks,
                     logger=True,
                     log_every_n_steps=10,
                     #enable_checkpointing=True
                     accelerator='cpu'
                    )

trainer.fit(tabular_model, tabular_data_module)

## TEST SET EVALUATION

In [15]:
os.chdir("..")
experiment_name = "_mbfc_allbias_include_categorical_1505"
# let's set the working directory for saving the reports & ConfusionMatrix
if not os.path.exists("results/Experiment{}".format(experiment_name)):
    # if the demo_folder directory is not present 
    # then create it.
    os.makedirs("results/Experiment{}".format(experiment_name))
    print("Made Experiment Folder!")

os.chdir("results/Experiment{}/".format(experiment_name))

In [16]:
import sklearn.metrics as metrics

def predict(tabular_model, tabular_data_module):
    data_loader = tabular_data_module.test_dataloader()
    batch_size = data_loader.batch_size
    n_rows = len(tabular_data_module.dataset_test)
    
    y_true = np.zeros(n_rows, dtype=np.float32)
    y_pred = np.zeros((n_rows,5), dtype=np.float32)
    indexes = []
    with torch.no_grad():
        idx = 0
        for index, num_batch, cat_batch, label_batch in data_loader:
            # get model output
            y_output = tabular_model(num_batch, cat_batch)
            y_prob = y_output.cpu().numpy()
            # map exp function to all outputs
            y_prob = list(map(np.exp,y_prob))

            # add predictions to output arrays
            start_idx = idx
            idx += batch_size
            end_idx = idx
            y_pred[start_idx:end_idx] = y_prob
            y_true[start_idx:end_idx] = label_batch.cpu().numpy()
            
            # append indexes
            indexes.extend(index)

            if end_idx == n_rows:
                break

    return y_true, y_pred, indexes

def get_AUC_scores(y_test,y_pred):
    """
    Since we need to use OneHotEncoded values for AUC, we'll do that here separately.
    We also calculate the AUC per class, and also its average. Then put this in
    array and pad it so it fits into the report DataFrame.
    """
    ohe = OneHotEncoder()
    out = ohe.fit_transform(y_test.values.reshape(-1, 1)).toarray()
    y_test_ohe = pd.DataFrame(out, index=y_test.index)
    # get same shape for predictions - note that preds often arrays, not series, so don't need .values
    out = ohe.transform(y_pred.reshape(-1, 1)).toarray()
    y_pred_ohe = pd.DataFrame(out, index=y_test.index)
    
    # compute actual scores
    AUC_avg = roc_auc_score(y_test_ohe,y_pred_ohe, multi_class='ovr')
    AUC_per_class = roc_auc_score(y_test_ohe,y_pred_ohe, average=None,multi_class='ovr')
    # add AUC to report DF
    AUC_list = np.append(AUC_per_class,AUC_avg)
    AUC_list = np.append(AUC_list, [0,0]) # add padding so it fits into DF
    
    return AUC_list

def evaluate_predictions(y_test,y_pred,save=True,model_name=""):
    """
    Note that y_test and y_pred both have to have 1 dimension only here - no probs per class.
    """
    # make list of class names in correct order
    class_names = ["left","left center","least biased","right center","right"] # in order from MBFC mappings
    # make report of predictions
    report = pd.DataFrame(classification_report(y_test,y_pred, output_dict=True,
                                  target_names=class_names))
    # get AUC score & add to report DF
    report.loc['AUC',:] = get_AUC_scores(y_test,y_pred)
    
    # make confusion matrix
    disp = ConfusionMatrixDisplay.from_predictions(
            y_test,
            y_pred,
            display_labels = class_names, 
            xticks_rotation="vertical",
            cmap=plt.cm.Blues,
        )
    disp.ax_.set_title("{} Confusion Matrix of Outlet Bias".format("model_name"))
    # print results and timing
    print("Test accuracy")
    print(f"{accuracy_score(y_test, y_pred):.2%}\n")

    if save == True:
        # save picture and report
        save_path_confmx = "{}_Confusion_Matrix.png".format(model_name)
        save_path_report = "{}_Report.csv".format(model_name)
        # picture
        disp.figure_.savefig(save_path_confmx)
        report.to_csv(save_path_report)

    return report,disp

def eval_model(model,model_name="PyTorch",save=False):
    # get true and predicted values (note that y_true here is still prob values per class, not output class)
    y_true, y_pred, indexes = predict(model, tabular_data_module)
    y_argmax_pred = np.argmax(y_pred,axis=1)
    # make y_true nto pd Series with old index intact
    y_test = pd.Series(y_true,index=indexes)
    # evaluate
    report,disp = evaluate_predictions(y_test,y_argmax_pred,save=save,model_name=model_name)
    return report,disp

In [None]:
eval_model(tabular_model,model_name="PyTorch",save=True)

In [None]:
os.listdir()

In [None]:
torch.save(tabular_model,"best_torch_15052023_acc70_auc80.pt")

# SHAP - Explaining Model predictions

In [18]:
import warnings
warnings.filterwarnings('ignore')

import shap
from torch.utils.data import ConcatDataset

In [None]:
# initialize JavaScript for visualizing the outputs
shap.initjs()

In [20]:
# go up one directory
os.chdir("../..")

### Make Explainer

In [21]:
# get background from training data
background = [torch.tensor(tabular_data_module.train_dataloader().dataset.X_num),
              torch.tensor(tabular_data_module.train_dataloader().dataset.X_cat)]
# initialise the explainer with the numeric and categorical input
explainer = shap.DeepExplainer(tabular_model,background)

In [58]:
def get_sample_by_index_name(index_name):
    """
    Function for retrieving the data based on outlet name.
    """
    index = tabular_data_module.full_dataloader().dataset.index.tolist() 
    idx = index.index(index_name) # get index position of
    # get info from dataloader
    outlet,num_features,cat_features,y_true = tabular_data_module.full_dataloader().dataset[idx]
    # convert to proper formats so that PyTorch accepts it, otherwise we get grad issues
    num_features = np.array(num_features).astype(np.float32)
    cat_features = np.array(cat_features).astype(np.float32)
    num_features = torch.tensor(num_features).unsqueeze(0)
    cat_features = torch.tensor(cat_features).unsqueeze(0)
    
    return idx, torch.tensor(num_features), torch.tensor(cat_features),y_true

def model_predict(inputs):
    num_tensors,cat_tensors = inputs
    # get model output
    y_output = tabular_model(num_tensors, cat_tensors)
    y_pred = y_output.detach().numpy()
    # map exp function to all outputs
    y_prob = list(map(np.exp,y_pred))

    y_argmax_pred = np.argmax(y_prob)
    return y_argmax_pred

def decision_plot_for_outlet(target_outlet_name,xlim=(-0.5,4.5)):
    classes_dict = {0:"Left", 1:"Left lean",2:"Least biased", 3:"Right lean",4:"Right"}
    # get tensors from model
    idx, num_tensors, cat_tensors, actual = get_sample_by_index_name(target_outlet_name)

    # model prediction was..
    prediction = model_predict([num_tensors, cat_tensors])
    
    print("The outlet is: ", target_outlet_name)
    print("Political leaning is: ", classes_dict[actual])
    print("Political leaning predicted as: ", classes_dict[prediction])
    
    # compute the SHAP values using the explainer
    shap_values = explainer.shap_values([num_tensors, cat_tensors])
    
    # plot decision plot for predicted class
    class_shap_values = shap_values[prediction][0] # get numeric values

    plt.figure()
    shap.decision_plot(explainer.expected_value[prediction], 
                       class_shap_values,
                       feature_names = num_cols.tolist(),
                       features=np.array(num_tensors),
                       xlim=xlim,
                       show=0)
    plt.title("Decision Plot for {}".format(target_outlet_name.upper()))
    plt.show()

## Get SHAP for Single Outlet

In [None]:
decision_plot_for_outlet('cnn.com',xlim=(-3.5,4.5))

In [None]:
decision_plot_for_outlet("breitbart.com",xlim=(-5,6))

In [None]:
decision_plot_for_outlet("theguardian.com",xlim=(-5,5))

In [None]:
help(shap.decision_plot)

# SCRAP

In [None]:
def model_predict_for_all_data(subset=""):
    if subset == "train":
        # get the dataset in question
        combined_dataset = tabular_data_module.dataset_train
        n_rows = len(tabular_data_module.dataset_train)
    elif subset == "val":
        combined_dataset = tabular_data_module.dataset_val
        n_rows = len(tabular_data_module.dataset_val)
    elif subset == "test":
        combined_dataset = tabular_data_module.dataset_test
        n_rows = len(tabular_data_module.dataset_test)
    
    else:
        # Concatenate the datasets from the three DataLoaders
        combined_dataset = ConcatDataset([tabular_data_module.dataset_train, 
                                          tabular_data_module.dataset_val, 
                                          tabular_data_module.dataset_test])
        n_rows = (len(tabular_data_module.dataset_train) + 
              len(tabular_data_module.dataset_val) + 
              len(tabular_data_module.dataset_test))

    # Create a new DataLoader for the combined dataset
    data_loader = torch.utils.data.DataLoader(combined_dataset, batch_size=32, shuffle=False)

    batch_size = data_loader.batch_size
    
    y_true = np.zeros(n_rows, dtype=np.float32)
    y_pred = np.zeros((n_rows,5), dtype=np.float32)
    indexes = []
    with torch.no_grad():
        idx = 0
        for index, num_batch, cat_batch, label_batch in data_loader:
            # get model output
            y_output = tabular_model(num_batch, cat_batch)
            y_prob = y_output.cpu().numpy()
            # map exp function to all outputs
            y_prob = list(map(np.exp,y_prob))

            # add predictions to output arrays
            start_idx = idx
            idx += batch_size
            end_idx = idx
            y_pred[start_idx:end_idx] = y_prob
            y_true[start_idx:end_idx] = label_batch.cpu().numpy()
            
            # append indexes
            indexes.extend(index)

            if end_idx == n_rows:
                break

    # get argmax (final prediction)
    y_argmax_pred = np.argmax(y_pred,axis=1)
    
    return y_true, y_argmax_pred, indexes

def model_predict_SHAP(inputs):
    y_true,y_pred,idxs = model_predict_for_all_data()
    predictions = pd.DataFrame(y_pred,index=idxs)
    return predictions

def model_predict_SHAP_train(inputs):
    y_true,y_pred,idxs = model_predict_for_all_data(subset="train")
    predictions = y_pred.reshape(-1, 1)
    return predictions

In [None]:
# make background dataset (training set) - note that since we're using the KernelExplainer, 
# we use the pandas dataset!

dataset_extension = "_mbfc_allbias_extrafeatures"

train = pd.read_csv("data/train{}.csv".format(dataset_extension))
train.set_index("outlet", inplace=True)

val = pd.read_csv("data/val{}.csv".format(dataset_extension))
val.set_index("outlet", inplace=True)

test = pd.read_csv("data/test{}.csv".format(dataset_extension))
test.set_index("outlet", inplace=True)

# split each dataset into X and y's
X_train = train.drop("lean", axis=1)
y_train = train["lean"]

X_val = val.drop("lean", axis=1)
y_val = val["lean"]

X_test = test.drop("lean", axis=1)
y_test = test["lean"]

# combine all sets so we can visualise the explanation for ANY outlet
X = pd.concat([X_train,X_val,X_test])
y = pd.concat([y_train,y_val,y_test])

In [None]:

#tabular_data_module.batch_size_train = 2000
#outlet,num_tensor,cat_tensor,y_true = next(iter(tabular_data_module.train_dataloader()))
#background = (outlet,num_tensor,cat_tensor)

In [None]:
#explainer = shap.DeepExplainer(tabular_model,tabular_data_module.train_dataloader())