In [1]:
from scipy.stats import mannwhitneyu

# helper function to compute Cohen's d value
def cohen_d(group1, group2):
    mean_diff = np.mean(group1) - np.mean(group2)
    pooled_std = np.sqrt((np.std(group1, ddof=1) ** 2 + np.std(group2, ddof=1) ** 2) / 2)
    
    return mean_diff / pooled_std

# helper function to compute p-value and effect size of the features among 2 groups
def performStat(group1,group2,selected_feats,alternative='two-sided',effectSize=0.1):
    featDiff = []
    largeEffect = []
    test_stat_col = []
    pvalue_col = []
    effect_size_col = []
    for feat in selected_feats:
        res = mannwhitneyu(group1[feat], group2[feat], alternative=alternative)
        test_stat = res[0]
        test_stat_col.append(test_stat)
        pvalue = round(res[1],5)
        pvalue_col.append(pvalue)
        if pvalue > 0.05:
            featDiff.append(feat)
        # compute effect size (Cohen's d)
        effect_size = round(cohen_d(group1[feat],group2[feat]), 4)
        effect_size_col.append(effect_size)
        if abs(effect_size) > effectSize:
            largeEffect.append(feat)
            
    analysisResult = pd.DataFrame([test_stat_col,pvalue_col,effect_size_col]).T
    analysisResult.columns = ['Test Statistic', 'Test p-value','Effect Size']
    analysisResult.index = selected_feats
    display(analysisResult)
    print(f'{len(featDiff)} features have p-value greater than 0.05, which is not significant different between 2 groups.')
    print(f'{len(largeEffect)} features have effect size greater than {effectSize}.')
    return largeEffect

def errorAnalysis(model,model_training_feature_set,X,y,effectSize=0.3):
    ypred_train = model.predict(X[model_training_feature_set])
    ypred_prob_train = model.predict_proba(X[model_training_feature_set])
    neg_pred = [prob[0] for prob in ypred_prob_train]
    pos_pred = [prob[1] for prob in ypred_prob_train]

    analysis = pd.DataFrame({'True':y,
                             'Predicted':ypred_train,
                             'Predict_NoAKI':neg_pred,
                             'Predict_AKI':pos_pred
                            })

    # dataframe with only features selected from genetic algorithm and the predicted value, prediction probablities
    analysis_df = pd.concat([Xtrain_processed[model_training_feature_set],analysis], axis=1)

    # view the first 5 rows of the analysis_df dataframe
    display(analysis_df.head())
    
    # obtain the index of the incorrect prediction
    incorrect_index = []
    for i in range(analysis_df.shape[0]):
        if analysis_df['True'][i] != analysis_df['Predicted'][i]:
            incorrect_index.append(i)

    # check the total number of incorrect prediction
    print('{} records are incorrectly predicted.'.format(len(incorrect_index)))
    
    # split the dataframe into 2
    # dataframe 1 - correctly predicted, named as 'correct_prediction'
    # dataframe 2 - incorrectly predicted, named as 'incorrect_prediction'

    # correct prediction
    correct_prediction = analysis_df[~analysis_df.index.isin(incorrect_index)]
    display(correct_prediction.head())
    print('Correct prediction:')
    print('Number of correct prediction: ', correct_prediction.shape[0])

    # incorrect prediction
    incorrect_prediction = analysis_df[analysis_df.index.isin(incorrect_index)]
    display(incorrect_prediction.head())
    print('Incorrect prediction:')
    print('Number of incorrect prediction: ', incorrect_prediction.shape[0])
    print()
    print('###########################################################################################################')
    
    # split the dataset into true positive, true negative, false positive & false negative
    correct = correct_prediction.groupby('True')
    true_positive = correct.get_group(1)
    true_negative = correct.get_group(0)

    incorrect = incorrect_prediction.groupby('True')
    false_positive = incorrect.get_group(0)
    false_negative = incorrect.get_group(1)

    # check the number of true positive, true negative, false positive, false negative
    print('\nNumber of True Positive: ',true_positive.shape[0])
    print('Number of True Negative: ',true_negative.shape[0])
    print('Number of False Positive: ',false_positive.shape[0])
    print('Number of False Negative: ',false_negative.shape[0])
    print()
    print('###########################################################################################################')  
    
    # comparison between True Positive & True Negative
    print('\nComparison of the effect size of selected features in True Positive & True Negative Classes:')
    performStat(true_positive, true_negative, model_training_feature_set, alternative='two-sided',effectSize=effectSize)
    
    print()
    print('###########################################################################################################')
    
    # comparison between True Positive & False Positive
    print('\nComparison of the effect size of selected features in True Positive & False Positive Classes:')
    performStat(true_positive, false_positive, model_training_feature_set, alternative='two-sided',effectSize=effectSize)
    
    print()
    print('###########################################################################################################')
    
    # comparison between True Negative & False Negative
    print('\nComparison of the effect size of selected features in True Negative & False Negative Classes:')
    performStat(true_negative, false_negative, model_training_feature_set, alternative='two-sided',effectSize=effectSize)