In [1]:
from datasets import titanic_data

from pygam import LogisticGAM, s, f

from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np

from tools import roc

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
original_X, original_y, train_X, train_y, test_X, test_y = titanic_data()

## Spline terms

In [3]:
selected_features = ['pclass', 'sex', 'age', 'sibsp', 'embarked_S']

In [4]:
model = LogisticGAM(s(0) + s(1) + s(2) + s(3) + s(4))
model._estimator_type = "classifier"
model.classes_ = np.array([False, True])
model.gridsearch(original_X[selected_features].values, original_y.values)
model.score = model.accuracy

100% (11 of 11) |########################| Elapsed Time: 0:00:00 Time:  0:00:00


In [5]:
fig = make_subplots(rows=1, cols=5, subplot_titles=original_X.columns)

for i in range(len(selected_features)):

    XX = model.generate_X_grid(term=i)

    x = XX[:, i]
    y, confidence = model.partial_dependence(term=i, X=XX, width=.95)

    lower = confidence[:,0]
    upper = confidence[:,1]

    fig.add_trace(
        go.Scatter(x=x, y=y),
        row=1, col=i + 1)

    fig.add_trace(
        go.Scatter(x=x, y=upper, fillcolor='rgba(0,100,80,0.2)', line=dict(width=0),),
        row=1, col=i + 1)
    
    fig.add_trace(
        go.Scatter(x=x, y=lower, fillcolor='rgba(100,50,80,0.1)', fill='tonexty',line=dict(width=0),),
        row=1, col=i + 1)

fig.update_layout(height=400, width=1000)
fig.update_layout(showlegend=False)
fig.show()

## Confusion matrix and ROC

In [6]:
tn, fp, fn, tp = confusion_matrix(original_y, model.predict(original_X[selected_features]).ravel()).ravel()

f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}"

'TP: 253, TN: 472, FP: 77, FN: 89'

In [7]:
roc(model, original_X[selected_features], original_y).show()