# tensorflow pistachio
Tuning with hyperopt

## Links
  - [notes on training/validation loss](https://siddiqueabusaleh.medium.com/why-my-training-loss-is-higher-than-validation-loss-is-the-reported-loss-even-accurate-8843e14a0756)
  - [initialisation values](http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines)
  - [shap feature importance](https://shap.readthedocs.io/en/latest/tabular_examples.html#neural-networks)

In [1]:
import tensorflow as tf
print(tf.__version__)

2024-12-05 14:41:12.268416: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


## arff to csv





In [2]:
import os
os.getcwd()

'/tf/notebooks'

In [3]:
import pandas as pd 
from scipy.io import arff
import os 

from pistachio.data import load_arff_file

label_mapping = {'Kirmizi_Pistachio': 0, 'Siit_Pistachio': 1}


arff_filename = './data/Pistachio_16_Features_Dataset.arff'
csv_filename = './data/pistachio_16.csv'

if not os.path.exists(csv_filename):
    df = load_arff_file(arff_filename, label_mapping)
    df.head()
    df.to_csv(csv_filename, index=False, header=True)
    print(f'wrote file to {csv_filename}')
else:
    print(f'{csv_filename} exists')


./data/pistachio_16.csv exists


In [4]:
## Parameters

In [5]:


# dataset
# BATCH_SIZE = 16 
# PREFETCH = tf.data.AUTOTUNE
SEED=37

# model parameters
# UNITS = 12
# LAYER_1_L1 = 2e-4
# LAYER_1_L2 = 5e-3
# LAYER_2_L1 = 2e-4
# LAYER_2_L2 = 5e-3



#model fitting
# EPOCHS = 500
# LEARNING_RATE = 0.001 # initial learning rate
# LR_PLATEAU_FACTOR = 0.5
# LR_PLATEAU_PATIENCE = 5
# LR_DECAY_RATE = 0.8
# MIN_LEARNING_RATE = 1e-6
# EARLY_STOPPING_PATIENCE = 40


# mlflow
MLFLOW_URI = uri="http://pistachio_mlflow:5000"
MLFLOW_EXPERIMENT = "pistachio_tf_tuning"
MLFLOW_RUN_DESCRIPTION = 'initial tuning of two layer model'
MLFLOW_TAGS = {'architecture': f'two layers'}

# hyperopt
TRIALS_FILE_LOCATION = f'./trials/trials_{MLFLOW_EXPERIMENT}.pkl'
# will save trials object at this location
TRIALS_PER_RUN = 5
# run this many trials per notebook execution.




## dataset


In [6]:
from pistachio.data import read_or_generate_splits

# define where train/test csvs will live
split_data_path = f"./data/seed_{SEED}/"
if not os.path.exists(split_data_path):
    os.makedirs(split_data_path)

train_df, valid_df, test_df = read_or_generate_splits(split_data_path, csv_filename, seed=SEED)

for setname, df in zip(['train','validation','test'],[train_df, valid_df, test_df]):
    print(setname)
    print(f'df shape = {df.shape}')
    agged = df.groupby('Class').agg({'AREA':'count'}).reset_index()
    print(agged)


train
df shape = (1288, 17)
   Class  AREA
0      0   738
1      1   550
validation
df shape = (430, 17)
   Class  AREA
0      0   247
1      1   183
test
df shape = (430, 17)
   Class  AREA
0      0   247
1      1   183


In [7]:
feature_columns = list(train_df.columns)
feature_columns.remove('Class')
feature_columns

['AREA',
 'PERIMETER',
 'MAJOR_AXIS',
 'MINOR_AXIS',
 'ECCENTRICITY',
 'EQDIASQ',
 'SOLIDITY',
 'CONVEX_AREA',
 'EXTENT',
 'ASPECT_RATIO',
 'ROUNDNESS',
 'COMPACTNESS',
 'SHAPEFACTOR_1',
 'SHAPEFACTOR_2',
 'SHAPEFACTOR_3',
 'SHAPEFACTOR_4']

In [8]:
# from pistachio.data import df_to_dataset
# # create datasets
# train_ds = df_to_dataset(train_df,'Class', shuffle=True, drop=True)
# valid_ds = df_to_dataset(valid_df,'Class', shuffle=False, drop=False)
# test_ds = df_to_dataset(test_df,'Class', shuffle=False, drop=False)

In [9]:
from hyperopt import hp
# hyperopt search space/parameters
hp_space = {
    # model
    'units': hp.randint('units', 5,12),
    'layer_l1_reg': hp.loguniform('layer_l1_reg', 2e-6,2e-3),
    'layer_l2_reg':hp.loguniform('layer_l2_reg', 2e-6,2e-3),
    'feature_columns':feature_columns,
    # fitting
    'learning_rate': hp.loguniform('learnig_rate', 1e-7,5e-3),
    'lr_plateau_factor': hp.uniform('lr_plateau_factor', 0.5, 0.95),
    'lr_plateau_patience': 20,
    'lr_decay_rate': 0.9,
    'min_learning_rate': 5e-8,
    'early_stopping_patience': 40,

    # data/batch/epochs
    'batch_size': 16,
    'prefetch':  tf.data.AUTOTUNE,
    'epochs': 500,
    'seed':SEED
}


In [19]:
from pistachio.data import df_to_dataset
from pistachio.model import get_pistachio_model
from typing import Dict 
import mlflow
# create datasets
import mlflow
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, accuracy_score, roc_curve

from pistachio.evaluation import plot_metric, get_roc_results, plot_roc_curve, get_confusion_matrix
from pistachio.evaluation import make_precision_recall_plot, make_prob_calibration_plot, make_confusion_matrix_plot
sns.set()



mlflow.set_tracking_uri(uri=MLFLOW_URI)
mlflow.set_experiment(MLFLOW_EXPERIMENT)

# define our hyperopt objective
def pistachio_objective(kwargs) ->Dict:
    '''take model parameters, build, train and evaluate model, return loss value and other stats'''
    
    # reset tf state
    tf.keras.backend.clear_session()

    # start mlflow run
    with  mlflow.start_run(tags=MLFLOW_TAGS, description=MLFLOW_RUN_DESCRIPTION) as mlflow_run:

        run_name = mlflow_run.info.run_name
        run_id = mlflow_run.info.run_id
        # mlflow.log_params(kwargs)


        # define datasets 
        # think these need to go in here, given that we're clearing the tf state
        train_ds = df_to_dataset(
            train_df,
            'Class',
            shuffle=True,
            drop=True,
            batch_size=kwargs.get('batch_size',32),
            prefetch=kwargs.get('prefetch', 32))

        valid_ds = df_to_dataset(
            valid_df,
            'Class', 
            shuffle=False,
            drop=False,
            batch_size=kwargs.get('batch_size',32),
            prefetch=kwargs.get('prefetch', 32))
        
        # get the model we'll train, adapting it on train data
        model = get_pistachio_model(
            feature_columns=feature_columns,
            units=kwargs.get('units',10),
            layer_l1_reg=kwargs.get('layer_l1_reg',0.0),
            layer_l2_reg=kwargs.get('layer_l2_reg',0.0))
    
        checkpoint_dir = './pistachio_model_checkpoints/'
        checkpoint_path = os.path.join(checkpoint_dir, run_name)
        os.makedirs(checkpoint_path, exist_ok=True)

        metrics = {
        'predicted_probability': [
            tf.keras.metrics.AUC(),
            tf.keras.metrics.Precision(),
            tf.keras.metrics.Recall(),
            tf.keras.metrics.BinaryAccuracy()]}

        callbacks = [
            # tf.keras.callbacks.TensorBoard(logdir, update_freq='batch'),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', 
                factor=kwargs.get('lr_plateau_factor'), 
                patience=kwargs.get('lr_plateau_patience'), 
                min_lr=kwargs.get('min_learning_rate')),
            tf.keras.callbacks.EarlyStopping(patience=kwargs.get()),
            # checkpoint
            tf.keras.callbacks.ModelCheckpoint(
                filepath=checkpoint_path,
                monitor='val_loss',
                mode='min',
                initial_value_threshold=0.7,
                save_best_only=True),
            # mlflow
            mlflow.keras.MlflowCallback(mlflow_run)]
    
        optimizer = tf.keras.optimizers.Adam(learning_rate=kwargs.get('learning_rate'))

        # compile model
        model.compile(
            optimizer=optimizer,
            loss={'predicted_probability': tf.keras.losses.BinaryCrossentropy(from_logits=False)},
            metrics=metrics)
        
        # train model
        history = model.fit(
            train_ds,
            epochs=kwargs.get('epochs'),
            callbacks=callbacks,
            validation_data=valid_ds)
        
        history_df = pd.DataFrame(history.history)
        history_df['epoch'] = history_df.index
        # history_df.columns

        # plot training stuff
        plot_dir = f'./plots/{run_name}/'
        os.makedirs(plot_dir, exist_ok=True)
        metrics_to_plot = [
            'learning_rate',
            'auc',
            'loss',
            'binary_accuracy',
            'recall',
            'precision']

        metric_plots = {}
        for mm in metrics_to_plot:
            metric_plots[mm] = plot_metric(history_df, mm);
            fig_path = os.path.join(plot_dir, f'{mm}_vs_epoch.png');
            print(fig_path)
            metric_plots[mm][0].savefig(fig_path);
        
        # look at the best training epoch, get some metrics

        val_metrics = [k for k in history_df.columns if k.startswith('val')]
        best_epoch = history_df.loc[history_df.val_loss == np.min(history_df.val_loss)][['epoch'] + val_metrics].copy()

        # best_epoch
        rename = {k:f'best_epoch_{k}' for k in val_metrics}
        rename['epoch'] = 'best_epoch'

        best_stats = best_epoch\
            .rename(columns=rename)\
            .to_dict(orient='records')[0]
        
        # log these things
        mlflow.log_artifacts(plot_dir, artifact_path='training_plots')
        mlflow.log_metrics(best_stats)

        # load the best version of the model
        model = tf.keras.models.load_model(checkpoint_path)
        # get predictions on validation set
        valid_features = {k: valid_df[k].values for k in feature_columns}
        valid_predictions = model.predict(valid_features)
        valid_df['predicted_prob'] = valid_predictions
        threshold = 0.5
        valid_df['predicted_class'] = valid_df.predicted_prob.map(lambda x: 0 if x < threshold else 1)

        # roc curve
        roc_results = get_roc_results(valid_df.predicted_prob, valid_df.Class)
        valid_auc_score = roc_auc_score(valid_df.Class, valid_df.predicted_prob)
        fig, ax = plot_roc_curve(*roc_results, title=f'validation data, auc_score = {valid_auc_score}');
        roc_plot_path = os.path.join(plot_dir, 'roc_curve.png')
        fig.savefig(roc_plot_path)

        # precision recall
        fig, ax = make_precision_recall_plot(valid_df.predicted_prob, valid_df.Class, title='precision-recall')
        prec_rec_path = os.path.join(plot_dir,'precision_recall_curve.png')
        fig.savefig(prec_rec_path)

        # confusion matrix
        fig, ax = make_confusion_matrix_plot(valid_df.predicted_class, valid_df.Class)
        confusion_plot_path = os.path.join(plot_dir, 'confusion_matrix.png')
        fig.savefig(confusion_plot_path)

        # prob calibration
        fig, ax = make_prob_calibration_plot(valid_df.predicted_prob, valid_df.Class, title='pistachio classifier probability calibration')
        prob_cal_path = os.path.join(plot_dir,'probability_calibration.png')
        fig.savefig(prob_cal_path)

        validation_metrics_path = './saved_model_validation_metrics.txt'
        with open(validation_metrics_path,'w') as outfile:
            outfile.write(f'accuracy: {accuracy_score(valid_df.Class,valid_df.predicted_class)}\n')
            outfile.write(f'precision: {precision_score(valid_df.Class,valid_df.predicted_class)}\n')
            outfile.write(f'recall: {recall_score(valid_df.Class,valid_df.predicted_class)}\n')
            outfile.write(f'f1_score: {f1_score(valid_df.Class,valid_df.predicted_class)}\n')
            outfile.write(f'roc_auc_score: {valid_auc_score}\n')
        
            mlflow.log_artifact(roc_plot_path, artifact_path='evaluation_plots')
            mlflow.log_artifact(confusion_plot_path, artifact_path='evaluation_plots')
            mlflow.log_artifact(prob_cal_path, artifact_path='evaluation_plots')
            mlflow.log_artifact(prec_rec_path, artifact_path='evaluation_plots')

            mlflow.log_artifact(validation_metrics_path)
            # mlflow.log_artifact(shap_bar_path, artifact_path='evaluation_plots')
            # mlflow.log_artifact(shap_violin_path, artifact_path='evaluation_plots')

        # print(open(validation_metrics_path,'r').read())
        # return. Can put more info in here, but it should be in mlflow regardless
        return {'status': 'ok', 'loss': best_stats['best_epoch_val_loss'], 'true_loss':best_stats['best_epoch_val_loss'] }



In [20]:
import pickle
from hyperopt import Trials, fmin, tpe
if os.path.exists(TRIALS_FILE_LOCATION):
    trials = pickle.load(open(TRIALS_FILE_LOCATION,'rb'))
else:
    trials = Trials()


evals_done = len(trials.trials)
max_evals = evals_done + TRIALS_PER_RUN
best = fmin(pistachio_objective,
    space=hp.uniform('x', -10, 10),
    algo=tpe.suggest,
    max_evals=100,
    trials=trials)

with open(TRIALS_FILE_LOCATION,'wb') as outfile:
    pickle.dump(trials,outfile)





🏃 View run judicious-seal-716 at: http://pistachio_mlflow:5000/#/experiments/969440810327601672/runs/e4561c61ade74b2bad21fc3ce669c473

🧪 View experiment at: http://pistachio_mlflow:5000/#/experiments/969440810327601672

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

job exception: 'float' object has no attribute 'get'



  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]


AttributeError: 'float' object has no attribute 'get'

## Functional API
try this instead, dataset is weird

In [None]:
raise Exception('halting here')
train_ds.cardinality().numpy()


In [None]:
from typing import List, Dict
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization, Normalization
from tensorflow.keras import Model
from tensorflow.keras.metrics import Accuracy, AUC, Recall, Precision

def get_pistachio_model(feature_columns: List[str], train_dataset: tf.data.Dataset, units: int=10):
    """build a pistachio model using functional api"""
    def _get_feature_normalizers():
        """initialise and adapt the feature normalisers"""
        print(f'preprocessing - initialising normalisers')
        normalizers = {}
        for feature in feature_columns:
            normaliser =  Normalization(axis=None, name=f'normalizer_{feature}')
            just_this_feature_ds = train_dataset.map(lambda x,y: x[feature])
            normaliser.adapt(just_this_feature_ds)
            normalizers[feature] = normaliser
        return normalizers
        
    def _build_model(normalizers: Dict):
        normalized_inputs = []
        raw_inputs = []
        for feature in feature_columns:
            feature_input = tf.keras.Input(shape=(1,), name=feature)
            raw_inputs.append(feature_input)
            normalized_input = normalizers[feature](feature_input)
            normalized_inputs.append(normalized_input)

        input_layer = tf.keras.layers.concatenate(normalized_inputs)

        # densely connected layers
        d1 = Dense(
            units,
            activation='relu',
            name='dense_1',
            kernel_regularizer=tf.keras.regularizers.L1L2(l1=LAYER_1_L1, l2=LAYER_1_L2))
        
        d2 = Dense(
            units,
            activation='relu',
            name='dense_2',
            kernel_regularizer=tf.keras.regularizers.L1L2(l1=LAYER_2_L1, l2=LAYER_2_L2))
        

        # output layer
        output_layer = Dense(1, activation='sigmoid', name='predicted_probability')
        # http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines

        # define graph
        x = d1(input_layer)
        x = d2(x)
        output_probability = output_layer(x)
        
        model = tf.keras.Model(raw_inputs, output_probability)
        return model
    normalizers = _get_feature_normalizers()
    model = _build_model(normalizers)
    return model
 

In [None]:
import os
import shutil



logdir = './pistachio_model_logs'
if os.path.exists(logdir):
    shutil.rmtree(logdir)
os.makedirs(logdir, exist_ok=True)
checkpoint_dir = './pistachio_model_checkpoints/'
os.makedirs(checkpoint_dir, exist_ok=True)

metrics = {
    'predicted_probability': [
        tf.keras.metrics.AUC(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
        tf.keras.metrics.BinaryAccuracy()]
}

callbacks = [
    tf.keras.callbacks.TensorBoard(logdir, update_freq='batch'),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=LR_PLATEAU_FACTOR, patience=LR_PLATEAU_PATIENCE, min_lr=MIN_LEARNING_RATE),
    # tf.keras.callbacks.LearningRateScheduler(lambda epoch, lr: max(lr*LR_DECAY_RATE, MIN_LEARNING_RATE))
    tf.keras.callbacks.EarlyStopping(patience=EARLY_STOPPING_PATIENCE)
    # mlflow included later - based off mlflow runid
    # checkpointing included later - uses mlflow runid
]

optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

# build the model
model = get_pistachio_model(feature_columns, train_ds, units=UNITS)

# compile the model
model.compile(
    optimizer=optimizer,
    loss={'predicted_probability': tf.keras.losses.BinaryCrossentropy(from_logits=False)},
    metrics=metrics)

In [None]:
params = {
    'BATCH_SIZE': BATCH_SIZE,
    'PREFETCH': PREFETCH,
    'SEED': SEED,
    'UNITS': UNITS,
    'EPOCHS': EPOCHS,
    'LEARNING_RATE': LEARNING_RATE,
    'LR_PLATEAU_FACTOR': LR_PLATEAU_FACTOR,
    'LR_DECAY_RATE': LR_DECAY_RATE,
    'MIN_LEARNING_RATE': MIN_LEARNING_RATE,
    'FEATURE_COLUMNS': feature_columns,
    'LAYER_1_L1':LAYER_1_L1,
    'LAYER_1_L2':LAYER_1_L2,
    'LAYER_2_L1':LAYER_2_L1,
    'LAYER_2_L2':LAYER_2_L2
}
for k,v in params.items():
    print(f'{k}: {v}')

In [None]:
import mlflow

mlflow.set_tracking_uri(uri=MLFLOW_URI)
mlflow.set_experiment(MLFLOW_EXPERIMENT)
# don't autolog
# mlflow.tensorflow.autolog()

# if passing an existing run_id to mlflow.start_run, it treats it as resuming that run (update/change parameters, metrics)



In [None]:
with mlflow.start_run(tags=MLFLOW_TAGS, description=MLFLOW_RUN_DESCRIPTION) as mlflow_run:
    # append mlflow callback to callbacks
    run_name = mlflow_run.info.run_name
    run_id = mlflow_run.info.run_id
    callbacks.append(mlflow.keras.MlflowCallback(mlflow_run))
    # append model checkpoint to callbacks
    checkpoint_path = os.path.join(checkpoint_dir,f'model_{mlflow_run.info.run_name}.model.keras')
    callbacks.append(tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        monitor='val_loss',
        mode='min',
        initial_value_threshold=0.7,
        save_best_only=True))       
    
    history = model.fit(
        train_ds,
        epochs=EPOCHS,
        callbacks=callbacks,
        validation_data=valid_ds)

    mlflow.log_params(params)

In [None]:
print(f'run_name: {run_name}, run_id: {run_id}')

In [None]:
history_df = pd.DataFrame(history.history)
history_df['epoch'] = history_df.index
history_df.columns

In [None]:
history_df.tail()

In [None]:
from typing import List, Dict, Callable, Tuple

from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, accuracy_score, roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

def plot_metric(history_df: pd.DataFrame, metric_name:str):
    '''plot metric vs epoch'''
    fig = plt.figure()
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    # train data value 
    if metric_name == 'learning_rate':
        ax.plot(history_df.index, history_df[metric_name], color=sns.xkcd_rgb['merlot'], label=metric_name)
    else:    
        ax.plot(history_df.index, history_df[metric_name], color=sns.xkcd_rgb['merlot'], label=f'train_{metric_name}')
        ax.plot(history_df.index, history_df[f'val_{metric_name}'], color=sns.xkcd_rgb['blurple'], label=f'val_{metric_name}')
    ax.legend()
    ax.set_title(f'{metric_name} vs epoch')
    return fig, ax

sns.set()
    

In [None]:

plot_dir = f'./plots/{run_name}/'
os.makedirs(plot_dir, exist_ok=True)
metrics_to_plot = [
    'learning_rate',
    'auc',
    'loss',
    'binary_accuracy',
    'recall',
    'precision']

metric_plots = {}
for mm in metrics_to_plot:
    metric_plots[mm] = plot_metric(history_df, mm);
    fig_path = os.path.join(plot_dir, f'{mm}_vs_epoch.png');
    print(fig_path)
    metric_plots[mm][0].savefig(fig_path);
                                 

    

In [None]:
metric_plots[metrics_to_plot[0]][0]


In [None]:
metric_plots[metrics_to_plot[1]][0]

In [None]:
metric_plots[metrics_to_plot[2]][0]

In [None]:
metric_plots[metrics_to_plot[3]][0]

In [None]:
metric_plots[metrics_to_plot[4]][0]

In [None]:
metric_plots[metrics_to_plot[5]][0]

In [None]:
# add plots as artifacts
val_metrics = [k for k in history_df.columns if k.startswith('val')]
best_epoch = history_df.loc[history_df.val_loss == np.min(history_df.val_loss)][['epoch'] + val_metrics].copy()

# best_epoch
rename = {k:f'best_epoch_{k}' for k in val_metrics}
rename['epoch'] = 'best_epoch'

best_stats = best_epoch\
    .rename(columns=rename)\
    .to_dict(orient='records')[0]
# best_stats


with mlflow.start_run(run_id=run_id) as mlflow_run:
#     for mm in metrics_to_plot:
#         # fig_path = os.path.join(plot_dir, f'{mm}_vs_epoch.png');
    mlflow.log_artifacts(plot_dir, artifact_path='training_plots')
    mlflow.log_metrics(best_stats)
        



In [None]:
best_stats

In [None]:
print(model.metrics_names)
model.evaluate(valid_ds, return_dict=True)

## Model Evaluation



In [None]:
# load the best model from earlier
model = tf.keras.models.load_model(checkpoint_path)
valid_features = {k: valid_df[k].values for k in feature_columns}
valid_predictions = model.predict(valid_features)
valid_df['predicted_prob'] = valid_predictions
threshold = 0.5
valid_df['predicted_class'] = valid_df.predicted_prob.map(lambda x: 0 if x < threshold else 1)
# valid_df.head()
# valid_predictions[0:10]

In [None]:
model.evaluate(valid_ds, return_dict=True)


In [None]:
# model.predict(valid_ds)

In [None]:
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, accuracy_score, roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score
import scipy
import statsmodels
from statsmodels.stats.proportion import proportion_confint


import sys
def get_roc_results(predicted_probs: List[float], actual_classes: List[float]) -> Tuple[List[float],List[float],List[float]]:
    """get roc curve definition

    Args:
        predicted_probs (List[float]): predicted probabilities
        actual_classes (List[float]): actual binary labels

    Returns:
        Tuple[List,List,List]: fpr, tpr, thresholds
    """
    fpr, tpr, thresholds = roc_curve(actual_classes, predicted_probs)
    if thresholds[0] == float('inf'):
        thresholds[0] = sys.float_info.max

    return fpr, tpr, thresholds

#################################################################

def plot_roc_curve(fpr, tpr, thresholds, title: str="ROC curve", xlabel='False Positive Rate', ylabel: str='True Positive Rate') -> Tuple[mpl.figure.Figure, mpl.axes.Axes]:
    """_summary_

    Args:
        fpr (_type_): _description_
        tpr (_type_): _description_
        thresholds (_type_): _description_
        title (str, optional): _description_. Defaults to "ROC curve".
        xlabel (str, optional): _description_. Defaults to 'False Positive Rate'.
        ylabel (str, optional): _description_. Defaults to 'True Positive Rate'.

    Returns:
        Tuple[mpl.Figure, mpl.Axis]: _description_
    """
    fig = plt.figure()
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.plot(fpr, tpr, color=sns.xkcd_rgb['blurple'], label='roc curve')
    ax.plot([0.0, 1.0],[0.0, 1.0], color=sns.xkcd_rgb['merlot'], linestyle='--', label='random')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.legend()
    # fig.show()
    return fig, ax
#################################################################
def get_confusion_matrix(predicted_classes, actual_classes, normalise=None):
    """get confusion matrix
    computes confusion matrix for binary classification

    Args:
        predicted_classes (_type_): _description_
        actual_classes (_type_): _description_
        normalise (bool, optional): _description_. Defaults to False.

    Returns:
        _type_: _description_
    """
    matrix = confusion_matrix(actual_classes,predicted_classes, normalize=normalise)
    return matrix
#################################################################

def make_confusion_matrix_plot(
    predicted_classes,
    actual_classes,
    title:str = 'confusion matrix',
    xlabel: str='predicted class',
    ylabel: str='actual class',
    class_names: List[str] = None,
    normalise:str=None
    ):
    """ generate confusion matrix plot"""
    fig = plt.figure()
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.grid(False)
    # cmap = sns.color_palette("magma_r", as_cmap=True)
    cmap = sns.light_palette("indigo", as_cmap=True)

    # cmap = 'viridis'

    matrix = confusion_matrix(actual_classes,predicted_classes, normalize=normalise)
    ax.imshow(matrix, cmap=cmap)
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            ax.text(i,j,f'{matrix[i,j]}')
    # ax.plot([0.0, 1.0],[0.0, 1.0], color=sns.xkcd_rgb['merlot'], linestyle='--', label='random')
    labels = class_names if class_names else ['0','1']

    ax.set_xlim([-0.5, matrix.shape[0]- 0.5])
    ax.set_ylim([matrix.shape[0]- 0.5, -0.5])
    ax.set_xticks(np.arange(matrix.shape[0]))
    ax.set_yticks(np.arange(matrix.shape[0]))
    ax.set_xticklabels(labels)
    ax.set_yticklabels(labels)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    # ax.legend()
    # fig.show()
    return fig, ax
#################################################################
def make_precision_recall_plot(predicted_probs, actual_classes, title: str="Precision-Recall Curve", xlabel='Recall',ylabel: str='Precision',
                              positive_rate:float=None):
    """make a roc curve"""
    precision, recall, _ = precision_recall_curve(actual_classes, predicted_probs)
    fig = plt.figure()
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    classifier_average_precision = average_precision_score(actual_classes, predicted_probs)
    ax.plot(recall, precision, color=sns.xkcd_rgb['blurple'], label=f'precision recall curve (average precision = {classifier_average_precision:0.3f}')
    if positive_rate:
        ax.plot([0.0, 1.0],[positive_rate, positive_rate], color=sns.xkcd_rgb['merlot'], linestyle='--', label=f'positive response rate = {positive_rate:0.3f}')

    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision')
    ax.set_title(title)
    ax.legend()
    # fig.show()
    return fig, ax
###########################################################

def make_prob_calibration_plot(predicted_probs, actual_classes, n_bins: int=20, alpha: float = 0.05, title:str = 'probability calibration'):
    """bin records, check that proportion of labels in each bin matches mean probability of that bin"""
    bins = pd.qcut(predicted_probs, n_bins, labels=False)
    df = pd.DataFrame({'probability':predicted_probs, 'class':actual_classes, 'bin':bins})
    df = df.sort_values(by='probability',ascending=False).reset_index(drop=True)
    agged = df.groupby('bin').agg(
        pred_prob=pd.NamedAgg('probability','mean'),
        pred_std=pd.NamedAgg('probability','std'),
        class_prob=pd.NamedAgg('class','mean'),
        class_sum=pd.NamedAgg('class','sum'),
        bin_size=pd.NamedAgg('class','count')
    )
    act_err_low, act_err_high = proportion_confint(agged.class_sum, agged.bin_size, method='wilson', alpha = alpha)
    z_low = scipy.stats.norm.ppf(alpha/2)
    z_high = scipy.stats.norm.ppf(1.0 - alpha/2)
    agged['pred_low'] =  -z_low*agged['pred_std']/np.sqrt(agged['bin_size']) # agged['pred_prob'] +
    agged['pred_high'] = z_high*agged['pred_std']/np.sqrt(agged['bin_size']) #+ agged['pred_prob'] +
    agged['actual_error_high'] = np.maximum(act_err_high  - agged.class_prob,0)
    agged['actual_error_low'] =  np.maximum(agged.class_prob - act_err_low,0)
    agged.loc[np.abs(agged.actual_error_high) < 1e-10, 'actual_error_high'] = 0


    # print(agged)
    # print(agged)
    fig = plt.figure()
    ax = fig.add_axes([0.1,0.1, 0.8, 0.8])
    ax.errorbar(
        agged.pred_prob,
        agged.class_prob,
        yerr=[agged.actual_error_low, agged.actual_error_high],
        xerr=[agged.pred_low,agged.pred_high],
        fmt='.',
        color=sns.xkcd_rgb['blurple'])
    ax.plot([0.0,1.0],[0.0,1.0],'--',label='ideal', color=sns.xkcd_rgb['dark blue'])
    ax.set_title(title)
    ax.set_xlabel('predicted probability')
    ax.set_ylabel('observed probabiilty')
    return fig, ax

In [None]:
valid_df.head()


In [None]:
roc_results = get_roc_results(valid_df.predicted_prob, valid_df.Class)
valid_auc_score = roc_auc_score(valid_df.Class, valid_df.predicted_prob)
fig, ax = plot_roc_curve(*roc_results, title=f'validation data, auc_score = {valid_auc_score}');
roc_plot_path = os.path.join(plot_dir, 'roc_curve.png')
fig.savefig(roc_plot_path)



In [None]:
fig, ax = make_confusion_matrix_plot(valid_df.predicted_class, valid_df.Class)
confusion_plot_path = os.path.join(plot_dir, 'confusion_matrix.png')
fig.savefig(confusion_plot_path)

In [None]:
validation_metrics_path = './saved_model_validation_metrics.txt'
with open(validation_metrics_path,'w') as outfile:
    outfile.write(f'accuracy: {accuracy_score(valid_df.Class,valid_df.predicted_class)}\n')
    outfile.write(f'precision: {precision_score(valid_df.Class,valid_df.predicted_class)}\n')
    outfile.write(f'recall: {recall_score(valid_df.Class,valid_df.predicted_class)}\n')
    outfile.write(f'f1_score: {f1_score(valid_df.Class,valid_df.predicted_class)}\n')
    outfile.write(f'roc_auc_score: {valid_auc_score}\n')

print(open(validation_metrics_path,'r').read())


In [None]:
fig, ax = make_precision_recall_plot(valid_df.predicted_prob, valid_df.Class, title='precision-recall')
prec_rec_path = os.path.join(plot_dir,'precision_recall_curve.png')
fig.savefig(prec_rec_path)

In [None]:
fig, ax = make_prob_calibration_plot(valid_df.predicted_prob, valid_df.Class, title='pistachio classifier probability calibration')
prob_cal_path = os.path.join(plot_dir,'probability_calibration.png')
fig.savefig(prob_cal_path)


## SHAP

In [None]:
import shap
def shap_wrapper(X):
    feature_dict = {k:X[:,i] for i,k in enumerate(feature_columns)}
    return model.predict(feature_dict).flatten()

shap_n_samples = 50
shap_explainer_samples = 50

data_shap = train_df.loc[:,feature_columns]
explainer = shap.KernelExplainer(shap_wrapper, data_shap.iloc[:shap_explainer_samples,:])
shap_values = explainer.shap_values(data_shap.iloc[shap_explainer_samples:shap_explainer_samples+shap_n_samples, :], nsamples=200)
# shap.force_plot(explainer.expected_value, shap_values, data_shap[237,:])




In [None]:
shap_values.shape

In [None]:
# shap.plots.scatter(explainer)
# shap.plots.bar(shap_values[0])
shap_violin_path = os.path.join(plot_dir,'shap_violin.png')
shap_bar_path = os.path.join(plot_dir,'shap_bar.png')

shap.summary_plot(
    shap_values, features=data_shap.iloc[50:100, :], feature_names=feature_columns, plot_type="violin", max_display=30, show=False)
plt.savefig(shap_violin_path)




In [None]:
shap.summary_plot(
    shap_values, features=data_shap.iloc[50:100, :], feature_names=feature_columns, plot_type="bar", max_display=30, show=False)
plt.savefig(shap_bar_path)

In [None]:
with mlflow.start_run(run_id=run_id) as mlflow_run:
#     for mm in metrics_to_plot:
#         # fig_path = os.path.join(plot_dir, f'{mm}_vs_epoch.png');
    mlflow.log_artifact(roc_plot_path, artifact_path='evaluation_plots')
    mlflow.log_artifact(confusion_plot_path, artifact_path='evaluation_plots')
    mlflow.log_artifact(prob_cal_path, artifact_path='evaluation_plots')
    mlflow.log_artifact(prec_rec_path, artifact_path='evaluation_plots')

    mlflow.log_artifact(validation_metrics_path)
    mlflow.log_artifact(shap_bar_path, artifact_path='evaluation_plots')
    mlflow.log_artifact(shap_violin_path, artifact_path='evaluation_plots')
    

    


    
    # mlflow.log_metrics(best_stats)