# Performance Metrics Implementation

1. **Accuracy Score**
   - Measures the ratio of correctly predicted instances out of the total instances.

2. **Precision**
   - Indicates the ratio of correctly predicted positive observations to the total predicted positives.

3. **Recall (Sensitivity or True Positive Rate - TPR)**
   - Represents the ratio of correctly predicted positive observations to all actual positives.

4. **False Positive Rate (FPR)**
   - Indicates the ratio of incorrectly predicted positive observations to all actual negatives.

5. **F1 Score**
   - The harmonic mean of precision and recall, providing a balance between the two.

6. **ROC (Receiver Operating Characteristic) Curve**
   - A graphical representation showing the trade-off between the TPR and FPR at various threshold settings.

7. **ROC Plot**
   - The visual plot of the ROC curve to illustrate the performance of the classification model.

8. **AUC-ROC (Area Under the ROC Curve)**
   - Quantifies the overall performance of the model by calculating the area under the ROC curve.

9. **Optimal Threshold**
   - The threshold value that provides the best trade-off between TPR and FPR, maximizing the model's performance.


In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
def compute_Performace_matrix(df,thres=0.5):
    """return   [[TN FN]  a numpy array
                [FP TP]]   
                
                the probablity score column should be name 
                proba it return a numpy array"""
    
    TN = TP = FN = FP = 0
    
    # comparing score and adding new column
    df['y_hat'] = np.where(df['proba']<thres, 0.0, 1.0)
    
    #TN
    result = ((df['y'] == 0) & (df['y_hat'] == 0)).values
    for i in result:
        if i == True:
            TN += 1
        
    #FN
    result = ((df['y'] == 1) & (df['y_hat'] == 0)).values
    for i in result:
        if i == True:
            FN += 1

    #FP
    result = ((df['y'] == 0) & (df['y_hat'] == 1)).values
    
    for i in result:
        if i == True:
            FP += 1

    #TP
    result = ((df['y'] == 1) & (df['y_hat'] == 1)).values
    
    for i in result:
        if i == True:
            TP += 1
    
    return np.array([[TN,FN],[FP,TP]])

In [3]:
def compute_TPR_and_FPR(confu):
    TN = confu[0][0]
    FN = confu[0][1]
    FP = confu[1][0]
    TP = confu[1][1]
    
    TPR = (TP / (TP+FN) )
    FPR = ( FP / (TN + FP) )
    return (TPR, FPR)

In [4]:
#compute precision and recall
def cal_pre_rec(confu):
    FN = confu[0][1]
    FP = confu[1][0]
    TP = confu[1][1]
    
    pre = (TP/(TP+FP))
    rec = (TP/(TP+FN))
      
    return (pre, rec)

In [5]:
#compute f1 score
def cal_F1_score(confu):
    
    pre, rec = cal_pre_rec(confu)
    
    score = ( (2*pre*rec) / (pre + rec) )
    
    return score

In [6]:
#compute accuracy score
def cal_accuracy_score(confu):
    TN = confu[0][0]
    FN = confu[0][1]
    FP = confu[1][0]
    TP = confu[1][1]
    
    return ((TN+TP) / (TN+TP+FN+FP))

In [7]:
#roc curve
def ROC(df):
    # sort the value in place
    sorted_df = df.sort_values(by='proba', ascending=False)
    
    _thers = list()
    _tpr = list()
    _fpr = list()
    
    for i in tqdm(sorted_df['proba']):
        confu = compute_Performace_matrix(df=sorted_df, thres=i)
        tpr, fpr = compute_TPR_and_FPR(confu)
        _thers.append(i)
        _tpr.append(tpr)
        _fpr.append(fpr)

    return np.array(_thers), np.array(_tpr), np.array(_fpr)
    

In [8]:
def plot_roc_curv(thresholds, tpr, fpr):
    # Generate a trace for ROC curve
    trace0 = go.Scatter(
        x=fpr,
        y=tpr,
        mode='lines',
        name='ROC curve'
    )

    # Only label every nth point to avoid cluttering
    n = 1000  
    indices = np.arange(len(thresholds)) % n == 0  # Choose indices where index mod n is 0

    trace1 = go.Scatter(
        x=fpr[indices], 
        y=tpr[indices], 
        mode='markers+text', 
        name='Threshold points', 
        text=[f"Thr={thr:.2f}" for thr in thresholds[indices]], 
        textposition='top center'
    )


    # Diagonal line
    trace2 = go.Scatter(
        x=[0, 1], 
        y=[0, 1], 
        mode='lines', 
        name='Random (Area = 0.5)', 
        line=dict(dash='dash')
    )

    data = [trace0, trace1, trace2]

    # Define layout with square aspect ratio
    layout = go.Layout(
        title='Receiver Operating Characteristic',
        xaxis=dict(title='False Positive Rate'),
        yaxis=dict(title='True Positive Rate'),
        autosize=False,
        width=800,
        height=800,
        showlegend=False
    )

    # Define figure and add data
    fig = go.Figure(data=data, layout=layout)

    # Show figure
    fig.show()


In [11]:
# area under curv of ROC
def auc_roc(tpr, fpr):
    return np.trapz(tpr, fpr)

In [12]:
def optimal_threshold(tpr, fpr):
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]
    return optimal_threshold

# implimentation