In [1]:
%matplotlib widget
import pandas as pd
import numpy as np
import ipywidgets as widgets
from ipywidgets import Layout, AppLayout
from IPython.display import display
from IPython.display import Audio
from functools import partial
import librosa.display
import matplotlib.pyplot as plt
import src.FeatureExtractors as fe
from matplotlib.colors import Normalize
import sklearn
from sklearn.model_selection import GroupKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.svm import SVC


# get our meta data
df = pd.read_csv('datasets/RAVDESS/metadata/RAVDESS.csv',usecols=['actor','actor_sex','emotion','label','filepath'])

# Choose the feature set(s) and add to dataframe

def choose_features(mfcc=40, mel=128):
    scaler = StandardScaler()
    #features = scaler.fit_transform(features)
    # mfcc only
    if mfcc != 'None' and mel == 'None':
        return scaler.fit_transform(np.load(f'datasets/RAVDESS/features/mfcc/mfcc{mfcc}.npy'))
    # mels only
    elif mfcc == 'None' and mel != 'None':
        return scaler.fit_transform(np.load(f'datasets/RAVDESS/features/mel/mel{mel}.npy'))
    # both
    elif mfcc != 'None' and mel != 'None':
        mfcc_frame = np.load(f'datasets/RAVDESS/features/mfcc/mfcc{mfcc}.npy')
        mel_frame = np.load(f'datasets/RAVDESS/features/mel/mel{mel}.npy')
        feature_matrix=np.array([])
        feature_matrix = np.hstack((mfcc_frame, mel_frame))
        return scaler.fit_transform(feature_matrix)



df['features'] = list(choose_features(40,128))





# split data set functions
def random_split(df,test_size=0.2):
    labels = np.array(df['label'])
    features = np.vstack(df['features'])
    
    return train_test_split(features, labels,test_size=test_size)


def split_by_actor_sex(df,train_set='male'):
    train = df
    test = df
    if train_set == 'male':
        train = df[df['actor_sex'] == 'male']
        test = df[df['actor_sex'] == 'female']
    elif train_set == 'female':
        train = df[df['actor_sex'] == 'female']
        test = df[df['actor_sex'] == 'male']

    # , np.vstack(test['features']),np.array(train['label'],np.array(test['label']))
    return np.vstack(train['features']), np.vstack(test['features']), np.array(train['label']),np.array(test['label'])


# CV scoring function -- if I end up wanting it, which maybe I don't?
sklearn.set_config(enable_metadata_routing=True)
def get_model_cv_scores(model, features, labels):
    cv = GroupKFold(5)
    rng = np.random.RandomState(7)
    groups = rng.randint(0, 10, size=len(labels))
    scoring = ['recall_micro', 'precision_micro','f1_micro','balanced_accuracy','roc_auc']
    scores = cross_validate(model, features, labels,scoring=scoring,cv=cv, params={"groups":groups})
    return pd.DataFrame(scores)

# Split the dataset into training and tests -- 
feature_train,feature_test,label_train,label_test = random_split(df, 0.4)

In [2]:
# Grid Search
from sklearn.svm import SVC
from sklearn.metrics import recall_score, balanced_accuracy_score
param_grid={
    "C" : [0.1,0.5,0.8,1.0,1.2,1.5,2],
    "gamma" : ["auto","scale"],
    "class_weight" : ["balanced",None]
}


from time import time
cv = GroupKFold(5)
rng = np.random.RandomState(7)
groups = rng.randint(0, 10, size=len(label_train))
grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid,scoring='recall',cv=cv,return_train_score=True)
start = time()
grid_search.fit(feature_train, label_train, groups=groups)

print(
    "GridSearchCV took %.2f seconds for %d candidate parameter settings."
    % (time() - start, len(grid_search.cv_results_["params"]))
)
use_cols = ['param_C','param_class_weight',  'param_gamma', 'mean_test_score','mean_train_score','std_test_score', 'rank_test_score']
gr = pd.DataFrame(grid_search.cv_results_)[use_cols]
gr.sort_values(by='rank_test_score',inplace=True)

GridSearchCV took 10.66 seconds for 28 candidate parameter settings.


In [3]:
from sklearn.metrics import ConfusionMatrixDisplay,RocCurveDisplay,PrecisionRecallDisplay,DetCurveDisplay
from sklearn.model_selection import cross_validate
from sklearn.metrics import recall_score,precision_score,f1_score,hinge_loss,accuracy_score,d2_absolute_error_score


class ResultsViewer:

    def __init__(self, df, features_train, labels_train,features_test, labels_test):
        
        self.df = self.arrange_columns(df)
        
        self.model = SVC()
        self.current_record_idx = 0

        self.features = features_train
        self.labels = labels_train

        self.features_train = features_train
        self.labels_train = labels_train

        # Testing only
        self.features_test = features_test
        self.labels_test = labels_test
        self.test_model = None
        self.test_mode = 0

        self.predictions = []


    def arrange_columns(self, in_df):
        df = in_df.copy()
        df['rank']=list(range(1,len(df)+1))
        df.drop(columns=['rank_test_score'],inplace=True)
        df.columns=['L2Mult','weight','gamma','test_recall','train_recall', 'test_stdev','rank']
        df = df[['rank','L2Mult','weight','gamma','test_recall','train_recall', 'test_stdev']]
        return df
    

    def get_top_records(self, num_records=10):
        return self.df.iloc[0:num_records]

    def format_top_records_table(self, recs,num_records=10):

        cols = ['rank', 'weight','gamma','L2Mult','test recall', 'train recall']

        cell_text = []
        for i in range(num_records):
            
            weight = 'bal'
            if recs['weight'].iloc[i] == None:
                weight ="unbal"
            
            cell_text.append([recs['rank'].iloc[i], 
                              weight,
                              recs['gamma'].iloc[i],
                              round(recs['L2Mult'].iloc[i],7),
                              round(recs['test_recall'].iloc[i],2),
                              round(recs['train_recall'].iloc[i],2)])
        return cols, cell_text

    def print_records_to_table(self):
       
        cols, cell_text = self.format_top_records_table(self.get_top_records())

        fig,ax = plt.subplots(figsize=(3,3),layout="constrained")
        fig.suptitle("Top Grid Search Results")
        
        fig.patch.set_visible(False)
        ax.axis('off')
        ax.axis('tight')
        fig.canvas.header_visible = False
        
        col_widths = [0.15,0.16,0.17,0.16,0.18,0.18]

        table = ax.table(cellText=cell_text,colLabels=cols,loc='center',colWidths=col_widths)
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
        plt.close()
        return fig
    
  
  
  ####################################################################################
    
    def select_record(self,idx):
        plt.close()
        self.current_record_idx = idx
        self.fit_model_with_record(idx)

    
    def apply_record_to_model(self, rec_index=0):
        
        return SVC(C=self.df['L2Mult'].iloc[rec_index],
                   kernel='rbf',
                   gamma=self.df['gamma'].iloc[rec_index],
                   class_weight=self.df['weight'].iloc[rec_index]
                   )
     
           


    def fit_model_with_record(self, rec_index=0):    
        self.model = self.apply_record_to_model(rec_index=rec_index)
        self.model = self.model.fit(self.features,self.labels)
        return self.model
       
    def set_testing_model(self):
        self.test_model = self.model
        self.test_mode = 1
        self.predictions = self.test_model.predict(self.features_test)



    def show_confusion_matrix_train(self):
        fig,axs = plt.subplots(figsize=(3,3), layout="constrained")
        axs.set_title("Confusion Matrix Train")
        ConfusionMatrixDisplay.from_estimator(self.model, self.features_train, self.labels_train,ax=axs,colorbar=False)
        plt.close()
        return fig

    def show_confusion_matrix_test(self):
        fig,axs = plt.subplots(figsize=(3,3), layout="constrained")
        axs.set_title("Confusion Matrix Test")
        if self.test_mode == 1:
            ConfusionMatrixDisplay.from_predictions(self.labels_test,self.predictions,ax=axs,colorbar=False,cmap="magma")
        plt.close()
        return fig

    def show_ROC(self):
        
        fig, ax = plt.subplots(figsize=(4,4),layout="constrained")
        fig.canvas.header_visible = False
        ax.set_title("ROC Curve")
        RocCurveDisplay.from_estimator(self.model, self.features_train, self.labels_train,ax=ax,name="ROC Training") 
        
        if self.test_mode == 1:
            pred = self.test_model.decision_function(self.features_test)
            RocCurveDisplay.from_predictions(self.labels_test,pred,ax=ax,name="ROC Test")
        
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.close()
        return fig


    def show_DET(self):
        
        fig, ax = plt.subplots(figsize=(4,3),layout="constrained")
        ax.set_title("DET Curve")
        fig.canvas.header_visible = False
        DetCurveDisplay.from_estimator(self.model, self.features_train, self.labels_train,ax=ax)
        if self.test_mode == 1:
            pred = self.test_model.decision_function(self.features_test)
            DetCurveDisplay.from_predictions(self.labels_test,pred,ax=ax)
        plt.ylabel('False Negative Rate')
        plt.xlabel('False Positive Rate')
        plt.close()
        return fig

    def show_precision_recall(self):

        fig, ax = plt.subplots(figsize=(4,3),layout="constrained")
        fig.canvas.header_visible = False
        ax.set_title("Precision Recall Curve (PRC)")
        PrecisionRecallDisplay.from_estimator(self.model, self.features_train, self.labels_train,ax=ax,name="PRC Train",plot_chance_level=True) 
        
        if self.test_mode == 1:
            pred = self.test_model.decision_function(self.features_test)
            PrecisionRecallDisplay.from_predictions(self.labels_test,pred,ax=ax,name="PRC Test",plot_chance_level=True)
        
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.close()
        return fig

    def get_train_metrics(self):
        # obtain scores
        
        scoring = ['recall','precision','accuracy','d2_absolute_error_score']
        scores = cross_validate(self.model, self.features, self.labels,scoring=scoring)
        
        cols = ['recall','precision','accuracy','d2_error']
        cells = []
        cells.append(round(scores['test_recall'].mean(),2))
        cells.append(round(scores['test_precision'].mean(),2))
        
        cells.append(round(scores['test_accuracy'].mean(),2))
        #cells.append(round(scores['test_f1_micro'].mean(),2))
        cells.append(round(scores['test_d2_absolute_error_score'].mean(),2))

        return cols, cells


    def get_test_metrics(self):
        pred = self.test_model.decision_function(self.features_test)
        cols = ['recall','recall_micro','precision','d2_error']
        cells = []
        cells.append(round(recall_score(self.labels_test, self.predictions),3))
        cells.append(round(precision_score(self.labels_test, self.predictions),3))
        cells.append(round(accuracy_score(self.labels_test, self.predictions),3))
        cells.append(round(d2_absolute_error_score(self.labels_test,self.predictions),3))
        
        return cols, cells
    
    def show_train_metrics(self):
        
        cols, cell_text = self.get_train_metrics()
        rows = ["Train Results"]
        fig,axs = plt.subplots(figsize=(2,2))
        fig.suptitle("Model Performance")
        fig.canvas.header_visible = False
        fig.patch.set_visible(False)
        axs.axis('off')
        axs.axis('tight')
        
        table = axs.table( colLabels=cols,rowLabels=rows,cellText=[cell_text],loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
        plt.close()
        return fig

    def show_test_metrics(self):
        cols , train_row = self.get_train_metrics()
        _, test_row = self.get_test_metrics()
        
        
        rows = ["Training","Testing"]
        fig,axs = plt.subplots(figsize=(3,2))
        fig.suptitle("Model Performance")
        fig.canvas.header_visible = False
        fig.patch.set_visible(False)
        axs.axis('off')
        axs.axis('tight')
        
        table = axs.table( colLabels=cols,rowLabels=rows,cellText=[train_row, test_row],loc='center')
        table.auto_set_font_size(False)
        table.set_fontsize(8)
        table.scale(1.5,1.5)
        plt.close()
        return fig



# populate the training results records
tr = ResultsViewer(gr,feature_train, label_train,feature_test,label_test)
tr.fit_model_with_record(0)
# prevent plot from displpaying automatically
plt.ioff() 


<contextlib.ExitStack at 0x2a41c41ad50>

In [4]:
rec_sel_label = widgets.Label(value="Select Record to View")

rec_sel_dropdown = widgets.Dropdown(
    options=['1','2','3','4','5','6','7','8','9','10'],
    value='1',
    description='',
    disabled=False
)


# Handlers
params_out = widgets.Output()
record_out = widgets.Output()
roc_out = widgets.Output()
det_out = widgets.Output()
stats_out =  widgets.Output()
cm_out = widgets.Output()
cm2_out = widgets.Output()
prec_recall_out = widgets.Output()
test_metrics_out = widgets.Output()

run_oos_button  = widgets.Button(description='Test model on out of sample data')

record_box = widgets.VBox([rec_sel_label,rec_sel_dropdown,record_out,prec_recall_out,run_oos_button])

model_box = widgets.VBox([test_metrics_out,prec_recall_out])
stat_box = widgets.VBox([stats_out,cm_out,cm2_out])
chart_box = widgets.VBox([roc_out, det_out])
left_box = record_box


def initialize():
    
    #with params_out:
     #   params_out.clear_output()
      #  display(tr.show_curr_record_params())

    with record_out:

        record_out.clear_output()
        display(tr.print_records_to_table())

    with stats_out:
        stats_out.clear_output()
        display(tr.show_train_metrics())

    with cm2_out:
        cm2_out.clear_output()

    with cm_out:
        cm_out.clear_output()
        display(tr.show_confusion_matrix_train())

    with roc_out:
        roc_out.clear_output()
        display(tr.show_ROC())

    with det_out:
        det_out.clear_output()
        display(tr.show_DET())
    
    

    with prec_recall_out:
        prec_recall_out.clear_output()
        display(tr.show_precision_recall())


No such comm: 2ebb2d91640c4900857dbfe240eb022b
No such comm: 2ebb2d91640c4900857dbfe240eb022b
No such comm: 2ebb2d91640c4900857dbfe240eb022b


No such comm: dff09d9e143d4d7eaa3dccf258683dbf
No such comm: dff09d9e143d4d7eaa3dccf258683dbf
No such comm: dff09d9e143d4d7eaa3dccf258683dbf


No such comm: d719e0a6fb1f41c59f47f5f469ac0aee
No such comm: d719e0a6fb1f41c59f47f5f469ac0aee
No such comm: d719e0a6fb1f41c59f47f5f469ac0aee


In [5]:
def select_record_to_view(dfx,names):
    left_box = record_box
    val = int(names.new) - 1
    dfx.select_record(val)
  
   # with params_out:
    #    params_out.clear_output()
     #   display(tr.show_curr_record_params())
    
    with stats_out:
        stats_out.clear_output()
        display(dfx.show_train_metrics())

    with cm_out:
        cm_out.clear_output()
        display(dfx.show_confusion_matrix_train())

    with roc_out:
        roc_out.clear_output()
        display(dfx.show_ROC())

    with det_out:
        det_out.clear_output()
        display(dfx.show_DET())

    with prec_recall_out:
        prec_recall_out.clear_output()
        display(dfx.show_precision_recall())

    plt.close('all')
    

def test_model_on_oos(dfx,val):
    
    dfx.set_testing_model()
   # with params_out:
    #    params_out.clear_output()
     #   display(tr.show_curr_record_params())

    with stats_out:
        stats_out.clear_output()
        display(dfx.show_test_metrics())

    with cm_out:
        cm_out.clear_output()
        display(dfx.show_confusion_matrix_train())

    with roc_out:
        roc_out.clear_output()
        display(dfx.show_ROC())

    with det_out:
        det_out.clear_output()
        display(dfx.show_DET())
    
    with prec_recall_out:
        prec_recall_out.clear_output()
        display(dfx.show_precision_recall())
    
    with cm2_out:
        cm2_out.clear_output()
        display(dfx.show_confusion_matrix_test())

        

    with test_metrics_out:
        test_metrics_out.clear_output()
        


    plt.close('all')     


initialize()



rec_sel_dropdown.observe(partial(select_record_to_view,tr),names='value')
run_oos_button.on_click(partial(test_model_on_oos, tr))

gridsearch_panel = widgets.HBox([left_box, stat_box,chart_box])

gridsearch_panel

HBox(children=(VBox(children=(Label(value='Select Record to View'), Dropdown(options=('1', '2', '3', '4', '5',…