In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

In [5]:
# importing the data
df=pd.read_csv('5_a.csv')

In [6]:
# chaning the class labels
df['y_pred']=np.where(df['proba']<0.5,0,1)

# changing the datatype of the y column as int
df['y']=df['y'].astype(int)

#### 1. Confusion matrix

In [7]:
def my_confusion_matrix(x,y):
    
    # checking whether the given two series has same length
    if len(x)==len(y):
        
        # initializing TP,FP,TN,FN
        TP=0
        TN=0
        FP=0
        FN=0

        # iterating through each row and  and 
        for i in range(len(x)):

            # finding the true label
            y_true=x[i]

            # finding the predicted label
            y_pred=y[i]

            # decing whether its TP or FP or TN or FN
            tp_condition= (y_true==1)&(y_pred==1)
            tn_condition= (y_true==0)&(y_pred==0)
            fp_condition= (y_true==0)&(y_pred==1)
            fn_condition= (y_true==1)&(y_pred==0)

            if tp_condition:
                TP+=1

            elif tn_condition:
                TN+=1

            elif fp_condition:
                FP+=1

            elif fn_condition:
                FN+=1

        # building our confusion matrix
        conf_mat=np.array([[TN,FP],[FN,TP]])

        # returning the confusion matrix
        return conf_mat
    
    else:
        print('x and y has different length, please provide the series with same length')

#### 2. F1 Score

In [8]:
def f1_score(x,y):
    
    # checking whether the given two series has same length
    if len(x)==len(y):
        
        # initializing TP,FP,TN,FN
        TP=0
        TN=0
        FP=0
        FN=0

        # iterating through each row and  and 
        for i in range(len(x)):

            # finding the true label
            y_true=x[i]

            # finding the predicted label
            y_pred=y[i]

            # decing whether its TP or FP or TN or FN
            tp_condition= (y_true==1)&(y_pred==1)
            tn_condition= (y_true==0)&(y_pred==0)
            fp_condition= (y_true==0)&(y_pred==1)
            fn_condition= (y_true==1)&(y_pred==0)

            if tp_condition:
                TP+=1

            elif tn_condition:
                TN+=1

            elif fp_condition:
                FP+=1

            elif fn_condition:
                FN+=1
        
        # calculating precision
        precision=TP/(TP+FP)
        
        # calulating recall
        recall=TP/(TP+FN)
        
        # calculating the f1 score
        f_score=2*precision*recall/(precision+recall)

        # returning the confusion matrix
        return f_score
    
    else:
        print('x and y has different length, please provide the series with same length')

#### 3. Accuracy Score

In [11]:
def accuracy_score(x,y):
    
    # checking whether the given two series has same length
    if len(x)==len(y):
        
        # initializing TP,FP,TN,FN
        TP=0
        TN=0
        FP=0
        FN=0

        # iterating through each row and  and 
        for i in range(len(x)):

            # finding the true label
            y_true=x[i]

            # finding the predicted label
            y_pred=y[i]

            # decing whether its TP or FP or TN or FN
            tp_condition= (y_true==1)&(y_pred==1)
            tn_condition= (y_true==0)&(y_pred==0)
            fp_condition= (y_true==0)&(y_pred==1)
            fn_condition= (y_true==1)&(y_pred==0)

            if tp_condition:
                TP+=1

            elif tn_condition:
                TN+=1

            elif fp_condition:
                FP+=1

            elif fn_condition:
                FN+=1
        
        # calculating the accuracy
        accuracy=(TP+TN)/(TP+FP+FN+TN)

        # returning the confusion matrix
        return accuracy
    
    else:
        print('x and y has different length, please provide the series with same length')

#### 4.  Mean Squared error

In [14]:
def mean_squared_eror(x,y):
    errors=[]
    
    # iterating through each row in the series
    for i in x.index:
        y_true=x[i]
        y_pred=y[i]
        error=(y_true-y_pred)**2
        errors.append(error)
    
    # calculating the mean squared error
    total_squared_error=sum(errors)
    mean_sq_error=total_squared_error/len(x)
    
    print(mean_sq_error)
    return mean_sq_error

#### 5. Mean absolute percentage error

In [15]:
def mape(x,y):
    
    # making an empty list for erros for all datapoints
    errors=[]
    
    for i in x.index:
        y_true=x[i]
        y_pred=y[i]
        
        # calculating the error
        error=abs(y_true-y_pred)
        errors.append(error)
    
    # computing the total error
    total_error=sum(errors)
    total_pred=y.sum()
    
    return total_error/total_pred

#### 6. R^2 Value

In [17]:
def rss(x,y):
    errors=[]
    
    # iterating through each row in the series
    for i in x.index:
        y_true=x[i]
        y_pred=y[i]
        error=(y_true-y_pred)**2
        errors.append(error)
    
    # calculating the mean squared error
    total_squared_error=sum(errors)
    return total_squared_error

def tss(x,y):
    errors=[]
    
    # iterating through each row in the series
    for i in x.index:
        y_true=x[i]
        y_pred=y.mean()
        error=(y_true-y_pred)**2
        errors.append(error)
    
    # calculating the mean squared error
    total_squared_error=sum(errors)
    
    return total_squared_error

In [18]:
def r2_value(x,y):
    e=rss(x,y)/tss(x,y)
    return 1-e