## Prédictions en production

### Prédictions sur les données de production 

In [27]:
import joblib

with open('./models/rf.pkl', 'rb') as model:
    rf = joblib.load(model)

In [18]:
import pandas as pd

data = pd.read_csv('./data/breast-test.csv', sep = ',')

In [19]:
data = data.drop(['id', 'Unnamed: 31'], axis=1)
data.head()

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.8,1101.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,138.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1799,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.3414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.4809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [28]:
y = rf.predict(data)

### Explicabilité des prédictions de production

In [17]:
from shapash.utils.load_smartpredictor import load_smartpredictor

predictor_load = load_smartpredictor('./models/predictor.pkl')

In [32]:
predictor_load.add_input(x=data, ypred=pd.Series(y))

In [33]:
predictor_load.data["ypred"].head()

Unnamed: 0,ypred,proba
0,1,0.788619
1,1,0.958333
2,1,1.0
3,1,0.566833
4,1,0.767619


## Contribution de l'ensemble des variables 

In [34]:
detailed_contributions = predictor_load.detail_contributions()

In [35]:
detailed_contributions.head()

Unnamed: 0,ypred,proba,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,0.788619,0.014234,-0.058397,0.054174,0.021626,0.002191,0.012244,0.036847,0.042307,...,0.030322,-0.090196,0.105287,0.096746,0.003141,0.00799,0.025212,0.068654,0.014827,0.00087
1,1,0.958333,0.017062,-0.005216,0.063537,0.038588,-0.000903,-0.001809,0.000635,0.046134,...,0.058901,-0.00448,0.137678,0.137656,-0.001919,-0.000761,-0.013848,0.071638,-0.0017,-0.000548
2,1,1.0,0.01195,0.010449,0.052395,0.029488,0.001629,0.008119,0.044944,0.051127,...,0.038763,0.005167,0.109647,0.097663,0.003009,0.006407,0.020666,0.070871,0.013175,-0.001124
3,1,0.566833,-0.007074,0.009243,-0.037628,-0.01206,0.006948,0.033719,0.056592,0.073548,...,-0.053034,0.009187,-0.064227,-0.07801,0.022462,0.035454,0.033589,0.138387,0.061351,0.008138
4,1,0.767619,0.016716,-0.059153,0.057589,0.027156,-0.000452,0.002657,0.03002,0.039072,...,0.042035,-0.09566,0.116568,0.117574,0.001969,-0.001002,0.017067,0.060122,-0.003643,-0.000602


## Contribution des 3 variables principales

In [36]:
predictor_load.modify_mask(max_contrib=3)

In [37]:
explanation = predictor_load.summarize()

In [38]:
explanation.head()

Unnamed: 0,ypred,proba,feature_1,value_1,contribution_1,feature_2,value_2,contribution_2,feature_3,value_3,contribution_3
0,1,0.788619,perimeter_worst,184.6,0.105287,area_worst,2019.0,0.096746,texture_worst,17.33,-0.090196
1,1,0.958333,perimeter_worst,158.8,0.137678,area_worst,1956.0,0.137656,concave points_worst,0.186,0.071638
2,1,1.0,perimeter_worst,152.5,0.109647,area_worst,1709.0,0.097663,concave points_worst,0.243,0.070871
3,1,0.566833,concave points_worst,0.2575,0.138387,area_worst,567.7,-0.07801,concave points_mean,0.1052,0.073548
4,1,0.767619,area_worst,1575.0,0.117574,perimeter_worst,152.2,0.116568,texture_worst,16.67,-0.09566
