In [1]:
%matplotlib widget
import warnings
warnings.filterwarnings('ignore')

import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display
from functools import partial
import matplotlib.pyplot as plt
import src.FeatureExplorer.DataController as datacontrol
import src.FeatureExplorer.ViewController as vc

df = datacontrol.dataframe('datasets/RAVDESS/metadata/RAVDESS.csv')
vc.view_feature_controls(df)

HBox(children=(VBox(children=(VBox(children=(Label(value='Emotions'),)), SelectMultiple(index=(0,), layout=Lay…

In [2]:
# Grid Search
from sklearn.svm import SVC
from sklearn.metrics import recall_score, balanced_accuracy_score
from sklearn.model_selection import GroupKFold, GridSearchCV
import pandas as pd
import numpy as np

class GridCV:

    def __init__(self, df):

        self.features_train = df.features_train
        self.label_train = df.labels_train


    def perform_gridsearch(self):
        param_grid={
            "C" : [0.1,0.5,0.8,1.0,1.2,1.5,2],
            "gamma" : ["auto","scale"],
            "class_weight" : ["balanced",None]
        }


        from time import time
        cv = GroupKFold(5)
        rng = np.random.RandomState(7)
        groups = rng.randint(0, 10, size=len(self.label_train))
        grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid,scoring='recall',cv=cv,return_train_score=True)
        start = time()
        grid_search.fit(self.features_train, self.label_train, groups=groups)

        print(
            "GridSearchCV took %.2f seconds for %d candidate parameter settings."
            % (time() - start, len(grid_search.cv_results_["params"]))
        )
        use_cols = ['param_C','param_class_weight',  'param_gamma', 'mean_test_score','mean_train_score','std_test_score', 'rank_test_score']
        gr = pd.DataFrame(grid_search.cv_results_)[use_cols]
        gr.sort_values(by='rank_test_score',inplace=True)
        return gr

In [3]:
# Record Visualizers, add to module when done
from sklearn.metrics import ConfusionMatrixDisplay,RocCurveDisplay,PrecisionRecallDisplay,DetCurveDisplay
class ViewGridSearchResults:
    
    def __init__(self):
        self.fig  = plt.figure(figsize=(3,3))

    def print_records_to_table(self, records):
        
        self.fig.clf()
        
        cols = records[0]
        cell_text = records[1]

        ax = self.fig.subplots()
        self.fig.suptitle("Top Grid Search Results")
        
        self.fig.patch.set_visible(False)
        ax.axis('off')
        ax.axis('tight')
        self.fig.tight_layout()
        self.fig.canvas.header_visible = False
        
        col_widths = [0.15,0.16,0.17,0.16,0.18,0.18]

        table = ax.table(cellText=cell_text,colLabels=cols,loc='center',colWidths=col_widths)
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
       
        return self.fig


class ViewPrecisionRecall:
    

    def __init__(self):

        self.fig = plt.figure(figsize=(4,3))
       
        
    def show_precision_recall(self,train_model,features_train,features_test,labels_train,labels_test,test_model,test_mode):
        self.fig.clf()
        
        ax = self.fig.subplots()
        #self.fig.set_size_inches(4,3)
        self.fig.canvas.header_visible = False
        ax.set_title("Precision Recall Curve (PRC)")
        PrecisionRecallDisplay.from_estimator(train_model, features_train, labels_train,ax=ax,name='train',plot_chance_level=True) 
        
        if test_mode == 1:
            pred = test_model.decision_function(features_test)
            PrecisionRecallDisplay.from_predictions(labels_test,pred,ax=ax,name="test",plot_chance_level=True)
        ax.set_aspect('auto')
        #ax.set_box_aspect(0.7)
        ax.set_ylabel('Precision')
        ax.set_xlabel('Recall')
        ax.legend(bbox_to_anchor=(0.1, -0.1), loc='upper left', borderaxespad=0)


        self.fig.tight_layout()
        return self.fig



class ViewConfusionMatrix:
    
    def __init__(self):
        self.train_fig = plt.figure(figsize=(3,3))
        self.test_fig = plt.figure(figsize=(3,3))

    def show_confusion_matrix_train(self,model,features_train,labels_train):
        
        self.train_fig.clf()
        
        axs = self.train_fig.subplots()
        axs.set_title("Confusion Matrix Train")
        ConfusionMatrixDisplay.from_estimator(model, features_train, labels_train,ax=axs,colorbar=False)
       
        return self.train_fig
    
    def show_confusion_matrix_test(self,test_mode, labels_test,predictions):
        self.test_fig.clf()
        axs = self.test_fig.subplots()
        axs.set_title("Confusion Matrix Test")
        if test_mode == 1:
            ConfusionMatrixDisplay.from_predictions(labels_test,predictions,ax=axs,colorbar=False,cmap="magma")
      
        return self.test_fig


class ViewModelMetrics:
    
    def __init__(self):

        self.fig = plt.figure(figsize=(2,1.25))

    def show_train_metrics(self,train_record):
        
        self.fig.clf()

        cols = train_record[0]
        cell_text = train_record[1]

       
        rows = ["Train Results"]
        axs = self.fig.subplots()
        self.fig.suptitle("Model Performance")
        self.fig.canvas.header_visible = False
        self.fig.patch.set_visible(False)
        axs.axis('off')
        axs.axis('tight')
        
        table = axs.table( colLabels=cols,rowLabels=rows,cellText=[cell_text],loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
       
        return self.fig

    def show_test_metrics(self,train_record, test_record):
      
        self.fig.clf()

        cols = train_record[0]
        train_row = train_record[1]
        test_row = test_record[1]
        
        rows = ["Training","Testing"]
        axs = self.fig.subplots()
        self.fig.suptitle("Model Performance")
        self.fig.canvas.header_visible = False
        self.fig.patch.set_visible(False)
        axs.axis('off')
        axs.axis('tight')
        
        table = axs.table( colLabels=cols,rowLabels=rows,cellText=[train_row, test_row],loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
     
        return self.fig

class ViewROC:

    def __init__(self):
        self.fig = plt.figure(figsize=(4,4))


    def show_ROC(self,train_model,features_train,features_test,labels_train,labels_test,test_model,test_mode):

        self.fig.clf()
        ax = self.fig.subplots()
        ax.set_title("ROC Curve")
        RocCurveDisplay.from_estimator(train_model, features_train, labels_train,ax=ax,name="train") 
        
        if test_mode == 1:
            pred = test_model.decision_function(features_test)
            RocCurveDisplay.from_predictions(labels_test,pred,ax=ax,name="test")
        
        self.fig.canvas.header_visible = False
        ax.set_ylabel('True Positive Rate')
        ax.set_xlabel('False Positive Rate')
        self.fig.tight_layout()
        return self.fig





class ViewDET:
    
    def __init__(self):
        self.fig = plt.figure(figsize=(4,3), layout="constrained")

    def show_DET(self,train_model,features_train,features_test,labels_train,labels_test,test_model,test_mode):
        self.fig.clf()
        ax = self.fig.subplots()
        ax.set_title("DET Curve")
        self.fig.canvas.header_visible = False
        DetCurveDisplay.from_estimator(train_model, features_train, labels_train,ax=ax,name='train')
        if test_mode == 1:
            pred = test_model.decision_function(features_test)
            DetCurveDisplay.from_predictions(labels_test,pred,ax=ax,name='test')
      
        ax.set_ylabel('False Negative Rate')
        ax.set_xlabel('False Positive Rate')
        self.fig.tight_layout()
        return self.fig

    

In [4]:
# Results Viewer
plt.rcParams.update({'font.size': 8})

from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score,precision_score,accuracy_score,d2_absolute_error_score
import pandas as pd
from sklearn.svm import SVC
import matplotlib.pyplot as plt


class ResultsViewer:

    def __init__(self, df, features_train, labels_train,features_test, labels_test):
        
        self.df = self.arrange_columns(df)
        
        self.model = SVC()
        self.current_record_idx = 0

        self.features = features_train
        self.labels = labels_train

        self.features_train = features_train
        self.labels_train = labels_train

        # Visualizers
        self.vgr = ViewGridSearchResults()
        self.prc = ViewPrecisionRecall()
        self.vcm = ViewConfusionMatrix()
        self.vmm = ViewModelMetrics()
        self.vroc = ViewROC()
        self.vdet = ViewDET()


        # Testing only
        self.features_test = features_test
        self.labels_test = labels_test
        self.test_model = None
        self.test_mode = 0

        self.predictions = []


    def arrange_columns(self, in_df):
        df = in_df.copy()
        df['rank']=list(range(1,len(df)+1))
        df.drop(columns=['rank_test_score'],inplace=True)
        df.columns=['L2Mult','weight','gamma','test_recall','train_recall', 'test_stdev','rank']
        df = df[['rank','L2Mult','weight','gamma','test_recall','train_recall', 'test_stdev']]
        return df
    

    def get_top_records(self, num_records=10):
        return self.df.iloc[0:num_records]

    def format_top_records_table(self, recs,num_records=10):

        cols = ['rank', 'weight','gamma','L2Mult','test recall', 'train recall']

        cell_text = []
        for i in range(num_records):
            
            weight = 'bal'
            if recs['weight'].iloc[i] == None:
                weight ="unbal"
            
            cell_text.append([recs['rank'].iloc[i], 
                              weight,
                              recs['gamma'].iloc[i],
                              round(recs['L2Mult'].iloc[i],7),
                              round(recs['test_recall'].iloc[i],2),
                              round(recs['train_recall'].iloc[i],2)])
        return cols, cell_text

    def print_records_to_table(self):
        return self.vgr.print_records_to_table(self.format_top_records_table(self.get_top_records()))

  ####################################################################################
    
    def select_record(self,idx):
        plt.close()
        self.current_record_idx = idx
        self.fit_model_with_record(idx)

    
    def apply_record_to_model(self, rec_index=0):
        
        return SVC(C=self.df['L2Mult'].iloc[rec_index],
                   kernel='rbf',
                   gamma=self.df['gamma'].iloc[rec_index],
                   class_weight=self.df['weight'].iloc[rec_index]
                   )
     
           
    def fit_model_with_record(self, rec_index=0):    
        self.model = self.apply_record_to_model(rec_index=rec_index)
        self.model = self.model.fit(self.features,self.labels)
        return self.model
       

    def set_testing_model(self):
        self.test_model = self.model
        self.test_mode = 1
        self.predictions = self.test_model.predict(self.features_test)


    def show_confusion_matrix_train(self):
        return self.vcm.show_confusion_matrix_train(self.model,
                                                    self.features_train,
                                                    self.labels_train)
   

    def show_confusion_matrix_test(self):
        return self.vcm.show_confusion_matrix_test(self.test_mode,
                                                   self.labels_test,
                                                   self.predictions)
    
    def show_ROC(self):
        return self.vroc.show_ROC(self.model,
                                self.features_train,
                                self.features_test,
                                self.labels_train,
                                self.labels_test,
                                self.test_model,
                                self.test_mode)
        
      
    def show_DET(self):
        return self.vdet.show_DET(self.model,
                                self.features_train,
                                self.features_test,
                                self.labels_train,
                                self.labels_test,
                                self.test_model,
                                self.test_mode)


    def show_precision_recall(self):
        return self.prc.show_precision_recall(self.model,
                                              self.features_train,
                                              self.features_test,
                                              self.labels_train,
                                              self.labels_test,
                                              self.test_model,
                                              self.test_mode)

    def get_train_metrics(self):
      
        scoring = ['recall','precision','accuracy','d2_absolute_error_score']
        scores = cross_validate(self.model, self.features, self.labels,scoring=scoring)
        
        cols = ['recall','precision','accuracy','d2_error']
        cells = []
        cells.append(round(scores['test_recall'].mean(),2))
        cells.append(round(scores['test_precision'].mean(),2))
        cells.append(round(scores['test_accuracy'].mean(),2))
        cells.append(round(scores['test_d2_absolute_error_score'].mean(),2))

        return cols, cells


    def get_test_metrics(self):
        pred = self.test_model.decision_function(self.features_test)
        cols = ['recall','recall_micro','precision','d2_error']
        cells = []
        cells.append(round(recall_score(self.labels_test, self.predictions),3))
        cells.append(round(precision_score(self.labels_test, self.predictions),3))
        cells.append(round(accuracy_score(self.labels_test, self.predictions),3))
        cells.append(round(d2_absolute_error_score(self.labels_test,self.predictions),3))
        
        return cols, cells
    
    def show_train_metrics(self):
        return self.vmm.show_train_metrics(self.get_train_metrics())


    def show_test_metrics(self):
        return self.vmm.show_test_metrics(self.get_train_metrics(),self.get_test_metrics())


In [5]:
# eval view controller
def eval_controller(tr):

    center_align = widgets.Layout(display='flex',
                    flex_flow='column',
                    align_items='center',
                    width='90%')

    

    rec_sel_label = widgets.Label(value="Select Record to View")

    rec_sel_dropdown = widgets.Dropdown(
        options=['1','2','3','4','5','6','7','8','9','10'],
        value='1',
        description='',
        disabled=False
    )


    # Handlers
    params_out = widgets.Output()
    record_out = widgets.Output()
    roc_out = widgets.Output()
    det_out = widgets.Output()
    stats_out =  widgets.Output()
    cm_out = widgets.Output(layout=center_align)
    cm2_out = widgets.Output(layout=center_align)
    prec_recall_out = widgets.Output(layout=center_align)
    test_metrics_out = widgets.Output()

    run_oos_button  = widgets.Button(description='Test model on out of sample data',layout=center_align)

    record_box = widgets.VBox([rec_sel_label,rec_sel_dropdown,record_out,prec_recall_out,run_oos_button])

   
    stat_box = widgets.VBox([stats_out,cm_out,cm2_out])
    chart_box = widgets.VBox([roc_out, det_out])
    left_box = record_box


    def initialize():
        tr.fit_model_with_record(0)

        with record_out:

            record_out.clear_output(wait=True)
            display(tr.print_records_to_table())

        with stats_out:
            stats_out.clear_output(wait=True)
            display(tr.show_train_metrics())

        with cm2_out:
            cm2_out.clear_output(wait=True)

        with cm_out:
            cm_out.clear_output(wait=True)
            display(tr.show_confusion_matrix_train())

        with roc_out:
            roc_out.clear_output(wait=True)
            display(tr.show_ROC())

        with det_out:
            det_out.clear_output(wait=True)
            display(tr.show_DET())
        
        

        with prec_recall_out:
            prec_recall_out.clear_output(wait=True)
            display(tr.show_precision_recall())

    def select_record_to_view(dfx,names):
        #left_box = record_box
        val = int(names.new) - 1
        dfx.select_record(val)
    
    # with params_out:
        #    params_out.clear_output()
        #   display(tr.show_curr_record_params())
        
        with stats_out:
            stats_out.clear_output(wait=True)
            display(dfx.show_train_metrics())

        with cm_out:
            cm_out.clear_output(wait=True)
            display(dfx.show_confusion_matrix_train())

        with roc_out:
            roc_out.clear_output(wait=True)
            display(dfx.show_ROC())

        with det_out:
            det_out.clear_output(wait=True)
            display(dfx.show_DET())

        with prec_recall_out:
            prec_recall_out.clear_output(wait=True)
            display(dfx.show_precision_recall())


        

    def test_model_on_oos(dfx,val):
        
        dfx.set_testing_model()
    # with params_out:
        #    params_out.clear_output()
        #   display(tr.show_curr_record_params())

        with stats_out:
            stats_out.clear_output(wait=True)
            display(dfx.show_test_metrics())

        with cm_out:
            cm_out.clear_output(wait=True)
            display(dfx.show_confusion_matrix_train())

        with roc_out:
            roc_out.clear_output(wait=True)
            display(dfx.show_ROC())

        with det_out:
            det_out.clear_output(wait=True)
            display(dfx.show_DET())
        
        with prec_recall_out:
            prec_recall_out.clear_output(wait=True)
            display(dfx.show_precision_recall())
        
        with cm2_out:
            cm2_out.clear_output(wait=True)
            display(dfx.show_confusion_matrix_test())

            

        with test_metrics_out:
            test_metrics_out.clear_output(wait=True)
            


    


    initialize()



    rec_sel_dropdown.observe(partial(select_record_to_view,tr),names='value')
    run_oos_button.on_click(partial(test_model_on_oos, tr))

    return widgets.HBox([left_box, stat_box,chart_box])

In [6]:
#import src.ModelEvaluator.GridSearch as gs
#import src.ModelEvaluator.ViewResults as vr
#import src.ModelEvaluator.EvalViewController as evc

#gridsearch = gs.GridCV(df)
gridsearch = GridCV(df)
results = gridsearch.perform_gridsearch()
# there should be a funciton in dataframe that spits these out
#tr = vr.ResultsViewer(results,vc.df.features_train,vc.df.labels_train,vc.df.features_test,vc.df.labels_test)
#evc.eval_controller(tr)


GridSearchCV took 10.88 seconds for 28 candidate parameter settings.


In [7]:
tr = ResultsViewer(results,df.features_train,df.labels_train,df.features_test,df.labels_test)
eval_controller(tr)

HBox(children=(VBox(children=(Label(value='Select Record to View'), Dropdown(options=('1', '2', '3', '4', '5',…