In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier

df = pd.read_csv("D:/CARRERA/TESIS/forest_explainer_tesis/datasets/Titanic/DataSet_Titanic.csv")
from sklearn.model_selection import train_test_split

data = df.drop(columns="Sobreviviente")
x_train, x_test, y_train, y_test = train_test_split(
    data, df["Sobreviviente"], test_size=0.2, random_state=123
)

random_forest_model: RandomForestClassifier = joblib.load(
    "D:/CARRERA/TESIS/forest_explainer_tesis/datasets/Titanic/titanic.joblib")

In [87]:
from sklearn.tree import DecisionTreeClassifier
from treeinterpreter import treeinterpreter as ti
   

def getTreeInterpreterParamethers(n, data, model: RandomForestClassifier | DecisionTreeClassifier, class_names, positive_class: int):
    if n >= 1 and n <= len(data):
        instance: pd.DataFrame = data[n-1:n]
        df_dict = {
            ("Instance", "Predictor"): [],
            ("Instance", "Value"): [],
        }
        for feature in instance:
            df_dict[("Instance", "Predictor")].append(feature)
            df_dict[("Instance", "Value")].append(pd.Series(instance[feature]).values[0])
        
        
        prediction, bias, contributions = ti.predict(model, instance)        
        for index, class_name in enumerate(class_names):
            df_dict[("Contribution" ,class_name)] = [contribution[index] for contribution in contributions[0]]
            
        print(df_dict)
                
                
        interpretation = {
            "prediction": prediction[0][positive_class],
            "bias": bias[0][positive_class], # (trainset mean)
            "contributions": []
        }
        for c, feature in sorted(
            zip(contributions[0], model.feature_names_in_),
            key=lambda x: -x[0][positive_class],
        ):
            interpretation["contributions"].append({
                "predictor": feature,
                "contribution": c[positive_class]
            })
        return interpretation, df_dict
    else:
        return None

In [88]:
interpretation, general_dict = getTreeInterpreterParamethers(
    data=x_test,
    class_names=["m", "v"],
    model=random_forest_model,
    n=20,
    positive_class = 0
)




{('Instance', 'Predictor'): ['Clase', 'Genero', 'Edad', 'HermEsp', 'PadHij'], ('Instance', 'Value'): [3, 0, 17.0, 0, 0], ('Contribution', 'm'): [0.07065494060269391, 0.12702456109440924, 0.0025528284825468133, -0.0016897700917525957, 0.014911922791045685], ('Contribution', 'v'): [-0.07065494060269391, -0.1270245610944092, -0.002552828482546816, 0.0016897700917525944, -0.014911922791045683]}




In [75]:
print(interpretation)

for el in interpretation:
    print(el)
    print(interpretation[el])

{'prediction': array([0.80305168, 0.19694832]), 'bias': array([0.5895972, 0.4104028]), 'contributions': [{'predictor': 'Genero', 'value': 0, 'contribution': 0.12702456109440924}, {'predictor': 'Clase', 'value': 3, 'contribution': 0.07065494060269391}, {'predictor': 'PadHij', 'value': 0, 'contribution': 0.014911922791045685}, {'predictor': 'Edad', 'value': 17.0, 'contribution': 0.0025528284825468133}, {'predictor': 'HermEsp', 'value': 0, 'contribution': -0.0016897700917525957}]}
prediction
[0.80305168 0.19694832]
bias
[0.5895972 0.4104028]
contributions
[{'predictor': 'Genero', 'value': 0, 'contribution': 0.12702456109440924}, {'predictor': 'Clase', 'value': 3, 'contribution': 0.07065494060269391}, {'predictor': 'PadHij', 'value': 0, 'contribution': 0.014911922791045685}, {'predictor': 'Edad', 'value': 17.0, 'contribution': 0.0025528284825468133}, {'predictor': 'HermEsp', 'value': 0, 'contribution': -0.0016897700917525957}]


In [89]:

df_general: pd.DataFrame = pd.DataFrame(general_dict)

In [91]:
df_general

Unnamed: 0_level_0,Instance,Instance,Contribution,Contribution
Unnamed: 0_level_1,Predictor,Value,m,v
0,Clase,3.0,0.070655,-0.070655
1,Genero,0.0,0.127025,-0.127025
2,Edad,17.0,0.002553,-0.002553
3,HermEsp,0.0,-0.00169,0.00169
4,PadHij,0.0,0.014912,-0.014912


In [93]:
import dash
import numpy as np
import pandas as pd

from dash import dash_table

# Create a pd.MultiIndex from the combination of t & m:
t = ["1M", "3M", "6M", "1Y"]
m = ["IV", "RV", "Spread"]

arrays = np.array(sorted([[b, a] for a in m for b in t]))

df = pd.DataFrame(
    sorted(arrays, key=lambda x: (x[0][1], x[0][0])), columns=["Tenor", None]
)

print(df)

index = pd.MultiIndex.from_frame(df)

# Create a mock df using random np floats, specifying the columns
# with the previously created pd.MultiIndex and the "index" here
# as the row labels
df2 = pd.DataFrame(
    np.around(np.random.randn(3, 12), decimals=2),
    index=["EURUSD", "GBPUSD", "USDJPY"],
    columns=index,
)

# Dash app
app = dash.Dash(__name__)

"""For getting the columns fed correctly to dash_table,
a two-row multi-header can be created by supplying 
the 'name' key of `DataTable.columns` with an array.

The trick then is to create unique IDs, which requires 
manipulation of the data into a list of dictionaries 
where each cell value's key is the artificially created
concatenated string (I just combined them; e.g., "1M_IV" 
is one of the unique keys, and so on).

Note: the use of '**' is a useful Python3+ way to merge 
dicts during list/dict comprehensions. This is necessary
for including the true index 'Ccy Pair' as key,value 
pairs in addition to a dict comprehension through the data.
Thus I needed to also transpose the df..."""
 
app.layout = dash_table.DataTable(
    id="table",
    columns=[{"name": ["Tenor", "Ccy Pair"], "id": "Ccy Pair"}]
    + [{"name": [x1, x2], "id": f"{x1}_{x2}"} for x1, x2 in df2.columns],
    data=[
        {
            **{"Ccy Pair": df2.index[n]},
            **{f"{x1}_{x2}": y for (x1, x2), y in data},
        }
        for (n, data) in [
            *enumerate([list(x.items()) for x in df2.T.to_dict().values()])
        ]
    ],
    merge_duplicate_headers=True,   # ← here's the main 🪄 😉 
    # Optional interactivity parameters＊:
    editable=True,
    filter_action="native",
    sort_action="native",
    sort_mode="multi",
    column_selectable="single",
    row_selectable="multi",
    row_deletable=True,
    selected_columns=[],
    selected_rows=[],
    page_action="native",
    page_current= 0,
    page_size= 10,
)

if __name__ == "__main__":
    app.run_server(debug=True, dev_tools_hot_reload=True)


   Tenor    None
0     1M      IV
1     1M      RV
2     1M  Spread
3     3M      IV
4     3M      RV
5     3M  Spread
6     6M      IV
7     6M      RV
8     6M  Spread
9     1Y      IV
10    1Y      RV
11    1Y  Spread


OSError: Address 'http://127.0.0.1:8050' already in use.
    Try passing a different port to run_server.