# Captum Tutorial

## Getting Started

In [74]:
import torch

import numpy as np
import torch.nn as nn

from captum.attr import IntegratedGradients

In [75]:
class ToyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(3,3)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(3,2)
        
        self.lin1.weight = nn.Parameter(torch.arange(-4.0, 5.0).view(3,3))
        self.lin1.bias = nn.Parameter(torch.zeros(3))
        
        self.lin2.weight = nn.Parameter(torch.arange(-3.0, 3.0).view(2,3))
        self.lin2.bias = nn.Parameter(torch.ones(2))
        
    def forward(self, input):
        return self.lin2(self.relu(self.lin1(input)))
    
model = ToyModel()
model.eval()

ToyModel(
  (lin1): Linear(in_features=3, out_features=3, bias=True)
  (relu): ReLU()
  (lin2): Linear(in_features=3, out_features=2, bias=True)
)

In [76]:
torch.manual_seed(123)
np.random.seed(123)

In [77]:
input = torch.randn(2,3)
baseline = torch.zeros(2,3)

In [78]:
ig = IntegratedGradients(model)
attributions, delta = ig.attribute(input, baseline, target=0, return_convergence_delta=True)
print('IG attributions:', attributions)
print('Convergence Delta:', delta)

IG attributions: tensor([[ -1.3376,   1.0833,  -2.2178],
        [ -3.3659, -10.7723,   0.8371]], dtype=torch.float64)
Convergence Delta: tensor([9.6858e-08, 9.2387e-07], dtype=torch.float64)


## Getting Started with Captum - Titanic Data Analysis

In [79]:
import numpy as np

import torch

from captum.attr import IntegratedGradients
from captum.attr import LayerConductance
from captum.attr import NeuronConductance

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from scipy import stats
import pandas as pd

In [80]:
titanic_data = pd.read_csv('titanic3.csv')
df.head(1)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,,"St Louis, MO"


In [81]:
titanic_data = pd.concat([titanic_data,
    pd.get_dummies(titanic_data['sex']),
    pd.get_dummies(titanic_data['embarked'],prefix="embark"),
    pd.get_dummies(titanic_data['pclass'],prefix="class")], axis=1
    )
titanic_data["age"] = titanic_data["age"].fillna(titanic_data["age"].mean())
titanic_data["fare"] = titanic_data["fare"].fillna(titanic_data["fare"].mean())
titanic_data = titanic_data.drop(['name','ticket','cabin','boat','body','home.dest','sex','embarked','pclass'], axis=1)


In [82]:
titanic_data

Unnamed: 0,survived,age,sibsp,parch,fare,female,male,embark_C,embark_Q,embark_S,class_1,class_2,class_3
0,1,29.000000,0,0,211.3375,True,False,False,False,True,True,False,False
1,1,0.920000,1,2,151.5500,False,True,False,False,True,True,False,False
2,0,2.000000,1,2,151.5500,True,False,False,False,True,True,False,False
3,0,30.000000,1,2,151.5500,False,True,False,False,True,True,False,False
4,0,25.000000,1,2,151.5500,True,False,False,False,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,0,14.500000,1,0,14.4542,True,False,True,False,False,False,False,True
1305,0,29.881138,1,0,14.4542,True,False,True,False,False,False,False,True
1306,0,26.500000,0,0,7.2250,False,True,True,False,False,False,False,True
1307,0,27.000000,0,0,7.2250,False,True,True,False,False,False,False,True


In [83]:
# Set random seed for reproducibility.
np.random.seed(131254)

# Convert features and labels to numpy arrays.
labels = titanic_data["survived"].to_numpy()
titanic_data = titanic_data.drop(['survived'], axis=1)
feature_names = list(titanic_data.columns)
data = titanic_data.to_numpy()

# Separate training and test sets using 
train_indices = np.random.choice(len(labels), int(0.7*len(labels)), replace=False)
test_indices = list(set(range(len(labels))) - set(train_indices))
train_features = data[train_indices]
train_labels = labels[train_indices]
test_features = data[test_indices]
test_labels = labels[test_indices]

In [85]:
import torch 
import torch.nn as nn
torch.manual_seed(1)

class TitanicSimpleNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(12, 12)
        self.sigmoid1 = nn.Sigmoid()
        self.linear2 = nn.Linear(12, 8)
        self.sigmoid2 = nn.Sigmoid()
        self.linear3 = nn.Linear(8, 2)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        lin1_out = self.linear1(x)
        sigmoid_out1 = self.sigmoid1(lin1_out)
        sigmoid_out2 = self.sigmoid2(self.linear2(sigmoid_out1))
        return self.softmax(self.linear3(sigmoid_out2))

In [88]:
net = TitanicSimpleNNModel()
USE_PRETRAINED_MODEL = True

if USE_PRETRAINED_MODEL:
    net.load_state_dict(torch.load('titanic_model.pt'))
    print("Model Loaded!")
    input_tensor = torch.from_numpy(train_features).type(torch.FloatTensor)
    label_tensor = torch.from_numpy(train_labels)
else:
    criterion = nn.CrossEntropyLoss()
    num_epochs = 200

    optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
    input_tensor = torch.from_numpy(train_features).type(torch.FloatTensor)
    label_tensor = torch.from_numpy(train_labels)
    for epoch in range(num_epochs):    
        output = net(input_tensor)
        loss = criterion(output, label_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0:
            print ('Epoch {}/{} => Loss: {:.2f}'.format(epoch+1, num_epochs, loss.item()))

    torch.save(net.state_dict(), 'models/titanic_model.pt')

Model Loaded!


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.