In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("D:/CARRERA/TESIS/forest_explainer_tesis/datasets/Titanic/DataSet_Titanic.csv")

In [4]:
from sklearn.model_selection import train_test_split

data = df.drop(columns="Sobreviviente")
x_train, x_test, y_train, y_test = train_test_split(data,
                                                    df["Sobreviviente"], test_size=0.2,
                                                    random_state=123)

In [5]:
import joblib
from sklearn.ensemble import RandomForestClassifier

random_forest_model: RandomForestClassifier = joblib.load(
    "D:/CARRERA/TESIS/forest_explainer_tesis/datasets/Titanic/titanic.joblib")

In [6]:
import plotly.graph_objects as go
from sklearn.metrics import roc_curve, auc, confusion_matrix

CONFUSION MATRIX

In [15]:
y_pred = random_forest_model.predict(data)
cm = confusion_matrix(y_true=df["Sobreviviente"], y_pred=y_pred)
cm

array([[365,  59],
       [ 92, 198]], dtype=int64)

In [40]:
print(df["Sobreviviente"].value_counts())

Sobreviviente
0    424
1    290
Name: count, dtype: int64


In [128]:
# current_class = 1
class_names = ["muere", "vive"]
for current_class in range(len(class_names)):
    print(cm[current_class][current_class])

365
198


In [205]:
from numpy import int64


def get_matrix_explanation(cm, class_names):
    matrix_explanation = []
    true_values = 0
    false_values = 0
    for current_class in range(len(class_names)):
        
        other_indexes = list(range(len(cm)))
        other_indexes.remove(current_class)
        true_positive = cm[current_class][current_class]
        true_negatives = []
        false_positives = []
        false_negatives = []
        for index in other_indexes:
            true_negatives.append(cm[index][index])
            false_positives.append(cm[index][current_class])
            false_negatives.append(cm[current_class][index])
        if current_class == 0:
            true_values = true_positive + sum(true_negatives)
            false_values = sum(false_positives) + sum(false_negatives)            
        

        explanation = {
            "precision": true_positive / (true_positive + sum(false_positives)),
            "recall": true_positive / (true_positive + sum(false_negatives)),
            "false_positive_rate": sum(false_positives)
            / (sum(false_positives) + sum(true_negatives)),
            "false_negative_rate": sum(false_negatives)
            / (sum(false_negatives) + true_positive),
        }

        explanation["f1_score"] = (
            2
            * explanation["precision"]
            * explanation["recall"]
            / (explanation["precision"] + explanation["recall"])
        )
        
        for elm in explanation:
            explanation[elm] =  f"{round(explanation[elm] * 100, 2)} %" 

        matrix_explanation.append(
            {
                "current_class": class_names[current_class],
                "explanation": explanation,
            }
        )

    return {
        "dtype": "pbject",
        "true_values": true_values,
        "false_values": false_values,
        "accuracy": f"{round((true_values / (true_values + false_values)) *100, 2)} %",
        "matrix_explanation": matrix_explanation,
    }

In [206]:
mx = get_matrix_explanation(cm, class_names)
matrix_explanation = mx.pop("matrix_explanation")

In [207]:
pd.DataFrame(mx, index=["Parameters"]).transpose()[1:]

Unnamed: 0,Parameters
true_values,563
false_values,151
accuracy,78.85 %


In [216]:
def create_column(m):
    return pd.DataFrame(m["explanation"], index=[m["current_class"]]).transpose().rename_axis("Parameters")

pd.concat([create_column(m) for m in matrix_explanation], axis=1).reset_index()

Unnamed: 0,Parameters,muere,vive
0,precision,79.87 %,77.04 %
1,recall,86.08 %,68.28 %
2,false_positive_rate,31.72 %,13.92 %
3,false_negative_rate,13.92 %,31.72 %
4,f1_score,82.86 %,72.39 %


In [218]:
create_column(matrix_explanation[1]).reset_index()

Unnamed: 0,Parameters,vive
0,precision,77.04 %
1,recall,68.28 %
2,false_positive_rate,13.92 %
3,false_negative_rate,31.72 %
4,f1_score,72.39 %


In [81]:
fig = go.Figure(
    data=go.Heatmap(
        z=cm,
        x=["muere", "vive"],
        y=["muere", "vive"],
        text=cm,
        texttemplate="%{text}",
    ),
    layout=dict(title="CMATRIX"),
    
)
fig.show()

In [6]:
options = [
    {
        'label': "Label 1"
    },
    {
        'label': "Label 2"
    },
]

y_pred = random_forest_model.predict_proba(x_test)

In [7]:
options = [
    {
        'label': "Label 1"
    },
    {
        'label': "Label 2"
    },
]

y_pred = random_forest_model.predict_proba(x_test)

In [23]:
def create_curve(y_scores, y_true, options, pointers):
        # One hot encode the labels in order to plot them
        # y_onehot = pd.get_dummies(y_true)
        # print(y_onehot)

        data = []
        trace1 = go.Scatter(x=[0, 1], y=[0, 1],
                            mode='lines',
                            line=dict(dash='dash'),
                            showlegend=False

                            )

        data.append(trace1)
        cont = 0
        for i in range(y_scores.shape[1]):
            y_score = y_scores[:, i]
            
            pointer = pointers[i]

            fpr, tpr, _ = roc_curve(y_true, y_score, pos_label=i)
            auc_score = auc(fpr, tpr)
            
            if pointer >= 0:
                name = f"{options[cont]['label']} (AUC={auc_score:.2f})"
                trace2 = go.Scatter(x=fpr, y=tpr,
                                    name=name,
                                    mode='lines')
                data.append(trace2)
                scatterPointer = int(len(fpr) * pointer / 100)
                print(scatterPointer)
                trace3 = go.Scatter(x=[fpr[scatterPointer]], y=[tpr[scatterPointer]], legendgroup='Marker',
                                    name=f"Marker {options[cont]['label']}",)
                trace4 = go.Scatter(x=[0, fpr[scatterPointer]], y=[tpr[scatterPointer], tpr[scatterPointer]],
                                mode='lines', legendgroup='Marker',
                                name=f"TPR {round(tpr[scatterPointer] * 100, 2)} %",
                                line=dict(dash='dash'),

                                )
                trace5 = go.Scatter(x=[fpr[scatterPointer], fpr[scatterPointer]], y=[0, tpr[scatterPointer]],
                                mode='lines', legendgroup='Marker',
                                name=f"FPR {round(fpr[scatterPointer] * 100, 2)} %",
                                line=dict(dash='dash'),

                                )
                data.append(trace3)
                data.append(trace4)
                data.append(trace5)
            cont += 1

        layout = go.Layout(
            title='ROC-AUC curva',
            yaxis=dict(title='Tasa de Positivos'),
            xaxis=dict(title='Tasa de Falsos Positivos')
        )

        fig = go.Figure(data=data, layout=layout)

        return fig

In [24]:
fig = create_curve(y_scores=y_pred, y_true=y_test, options=options, pointers=[45,-1])
fig


17
