In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, brier_score_loss
from sklearn.calibration import calibration_curve
from sklearn.datasets import load_iris
iris_ = load_iris()
iris = pd.DataFrame(iris_.data, columns=iris_.feature_names).assign(target=iris_.target)

from biodat_load import X_train, y_train, X_test, y_test

model = LogisticRegression(solver='lbfgs', max_iter=10**4).fit(X_train, y_train)
predict = pd.Series([x[0] for x in model.predict_proba(X_test)], index=y_test.index)

print(f'Brier score: {brier_score_loss(y_test, predict):.5}')

def mk_bind(Ndv: int = 10):
    dv = [(x//Ndv, f'{x}-{x+Ndv-1}%') for x in range(0, 100, Ndv)]
    return pd.cut(predict, bins=100//Ndv, labels=[l[1] for l in dv])

predict_binned = mk_bind()

cali_df = pd.DataFrame(list(zip(y_test, predict, predict_binned)), 
                       columns=['actual', 'prediction', 'bin'], 
                       index=y_test.index)

by_mean = cali_df.groupby(['bin']).mean()

cali_df.head()

Brier score: 0.57721


Unnamed: 0,actual,prediction,bin
1073,1,0.918909,90-99%
3418,0,0.959679,90-99%
2597,1,0.73697,70-79%
3317,1,0.726654,70-79%
602,1,0.045612,0-9%


In [28]:
mod_performance = alt.Chart(
    pd.DataFrame(np.array(calibration_curve(y_test, predict, n_bins=10)).T, columns=['true','predicted'])
).mark_point(
).encode(
    x='true:Q', 
    y='predicted:Q')

perfect = alt.Chart(
    pd.DataFrame(np.array([np.linspace(0,1,100), np.linspace(0,1,100)]).T, columns=['true','predicted'])
).mark_line(
).encode(
    x='true',y='predicted')

(perfect + mod_performance).serve()


Note: if you're in the Jupyter notebook, Chart.serve() is not the best
      way to view plots. Consider using Chart.display().
You must interrupt the kernel to cancel this command.

Serving to http://127.0.0.1:8889/    [Ctrl-C to exit]


  attrs['type'] = infer_vegalite_type(data[attrs['field']])
127.0.0.1 - - [09/Mar/2019 12:02:46] "GET / HTTP/1.1" 200 -



stopping Server...


In [25]:
by_mean

Unnamed: 0_level_0,actual,prediction
bin,Unnamed: 1_level_1,Unnamed: 2_level_1
0-9%,0.854839,0.033513
10-19%,0.735632,0.146909
20-29%,0.622642,0.251772
30-39%,0.681818,0.349122
40-49%,0.521739,0.441332
50-59%,0.5,0.546708
60-69%,0.317073,0.651518
70-79%,0.263158,0.747405
80-89%,0.164179,0.859823
90-99%,0.215278,0.958297


In [27]:
pd.DataFrame(np.array(calibration_curve(y_test, predict, n_bins=10)).T, columns=['actual','predicted'])

Unnamed: 0,actual,predicted
0,0.854839,0.033513
1,0.735632,0.146909
2,0.622642,0.251772
3,0.681818,0.349122
4,0.521739,0.441332
5,0.5,0.546708
6,0.317073,0.651518
7,0.263158,0.747405
8,0.164179,0.859823
9,0.215278,0.958297


In [4]:
#from . import Book
class CalibrationViewer: 
    '''view calibration a forecaster's calibration and brier score '''
    def __init__(self, bk: Book, relevant_to_payout: ratio = ratio(1,5), mode: str = "NULL"): 
        self.book = bk
        self.relevant_to_payout = (relevant_to_payout * len(self.book.players_df)).__ceil__()
        self.expectation: bool = any([x=='NOBODY' for x in self.book.games_df.winner.apply(show)])
        
    

ImportError: cannot import name 'Book'