# Compute performance metrics for the given Y and Y_score without sklearn

In [1]:
import numpy as np
import pandas as pd
# other than these two you should not import any other packages

In [2]:
def threshold(x,n):
    """
    Creates 0 or 1 class labels for probability values based on threshold
    :param n: threshold value 0.5
    :param x: Probability value
    :return: 0 if value is less than 0.5 , else 1
    """
    x = np.where(x < n, 0, 1)
    return x

### Confusion Matrix

-> Confusion Matrix provides insight about which classes are predicted correctly and which are predicted incorrectly and what      type of error is occured.

-> It categorizes prediction into positive predictions and negative predictions,Hence it is possible to focus on one class through precision and recall and that is why it is suitable for imbalanced classification.


In [3]:
 # https://stackoverflow.com/questions/36921951/truth-value-of-a-series-is-ambiguous-use-a-empty-a-bool-a-item-a-any-o
def confusion_matrix(true, pred):  
    """
    Calculates values of confusion matrix
    :param true: true values from true value column
    :param pred: predicted values from proba column
    :return: True negative, True Positive, False Positive and False negative
    """
    tp = np.sum(np.logical_and(true == 1, pred == 1))
    tn = np.sum(np.logical_and(true == 0, pred == 0))
    fp = np.sum(np.logical_and(true == 0, pred == 1))
    fn = np.sum(np.logical_and(true == 1, pred == 0))
    
    return tp, tn, fp, fn

### Precision

-> Precision is number of positive predictions that are actually from the positive class.

-> Precision doesn't consider false negatives, so we can not figure out how many actual positive class predictions are part of negative class.

In [4]:
def precision(tp,fp):
    """
    Calculates Precision
    :param tp: true positive values from confusion matrix
    :param fp: false positive values from confusion matrix
    :return: precision in percentage
    """
    return tp/float(tp+fp)

### Recall

-> Recall is about how well the positive class has been predicted.

-> Recall considers false negatives and so it provides information about predictions that could have been positive.

In [5]:
def recall(TP,FN):
    """
    Calculates Recall
    :param TP: True positive from performance matrix
    :param FN: False negative from performance matrix
    :return: percentage of recall values
    """
    return TP/(TP+FN)

### Accuracy Score

-> Accuracy score is not suitable for imbalanced classification, As it doesn't consider correctly classified points  and incorrectly classified points from different classes, rather it considers only true positiva nad true negatives.

In [6]:
def accuracy_score(tp,tn,fp,fn):
    """
    Calculates accuracy score
    :param Tp: True Positive values from performance matrix
    :param Tn: True Negative values from performance matrix
    :param Fp: False Positive values from performance matrix
    :param Fn: False negative values from performance matrix
    :return: Accuracy score as a float value
    """
    
    res = (tp+tn)/(tp+fp+fn+tn)
    return res

### True Positive Rate

-> TPR is same as recall and it tells number all correct positive predictions made out of all correct predicitons that could have been made.

In [7]:
def TPR(TP,FN):
    """
    Calculates True positive rate
    :param TP: True Positive from confusion matrix
    :param FN: False negative from confusion matrix
    :return: True Positive Rate as a float value
    """
    return (TP)/(TP+FN)

### False Positive Rate

-> False positive rate is number of actual negative predicitons predicted as positive from all negative predictions.


In [8]:
def FPR(FP, TN):
    """
    Calculates false positive rate
    :param FP: False Positive from performance matrix
    :param TN: True negative from performance matrix
    :return: false positive rate as a float value
    """
    return (FP)/(FP+TN)

### F1 Score

-> Precision gives more weightage to false positve, Where as recall gives more weightage to false negatives.

-> F1 score is geomatric mean of precision and recall and hence it balances both the precision and the recall.


In [9]:
def f1_score(precision,recall):
    """
     Calculates F1 score
    :param precision: precision value in percentage
    :param recall: Recall value in percentage
    :return: F1 score as a float value
    """
    ans = float(2*precision*recall)/float(precision+recall)
    return ans

In [10]:
def thre_desc(x):
    """
    Round up threshold values to one decimal point and arrange them in descending order
    :param x: Probability values
    :return: Thresholds in descending order
    """  
    prob_threshold = x.to_numpy()
    y = prob_threshold.round(decimals=1)
    z = np.unique(y)
    thre_asce = np.sort(z)
    thre_desc = np.flip(thre_asce)
    return thre_desc

In [11]:
def tpr_fpr(y_true, y_pred):
    """
    Calculates TPR and FPR for all thresholds
    :param y_true: Actual class labels
    :param y_pred: Predicted probability values
    :return: list of TPR and FPR values across various thresholds
    """
    tpr_array = []
    fpr_array = []
    thre_list = thre_desc(y_pred)
    for i in thre_list:
        m = threshold(y_pred, i)
        tp, tn, fp, fn = confusion_matrix(y_true, m)
        tpr = TPR(tp, fn)
        tpr_array.append(tpr)
        fpr = FPR(fp, tn)
        fpr_array.append(fpr)
    return tpr_array, fpr_array


# Problem A

<pre>
<font color='red'><b>A.</b></font> Compute performance metrics for the given data <strong>5_a.csv</strong>
   <b>Note 1:</b> in this data you can see number of positive points >> number of negatives points
   <b>Note 2:</b> use pandas or numpy to read the data from <b>5_a.csv</b>
   <b>Note 3:</b> you need to derive the class labels from given score</pre> $y^{pred}= \text{[0 if y_score < 0.5 else 1]}$

<pre>
<ol>
<li> Compute Confusion Matrix </li>
<li> Compute F1 Score </li>
<li> Compute AUC Score, you need to compute different thresholds and for each threshold compute tpr,fpr and then use               numpy.trapz(tpr_array, fpr_array) <a href='https://stackoverflow.com/q/53603376/4084039'>https://stackoverflow.com/q/53603376/4084039</a>, <a href='https://stackoverflow.com/a/39678975/4084039'>https://stackoverflow.com/a/39678975/4084039</a> Note: it should be numpy.trapz(tpr_array, fpr_array) not numpy.trapz(fpr_array, tpr_array)</li>
<li> Compute Accuracy Score </li>
</ol>
</pre>

In [12]:
df = pd.read_csv('5_a.csv')

In [13]:
df.head()

Unnamed: 0,y,proba
0,1.0,0.637387
1,1.0,0.635165
2,1.0,0.766586
3,1.0,0.724564
4,1.0,0.889199


In [14]:
df['prob_val'] = df['proba'][:]

In [15]:
df['proba'] = threshold(df['proba'],0.5)

### Compute Confusion Matrix 

In [16]:
# Confusion matrix values True Positive, True Negative, False Positive, False Negative calculations
tp, tn, fp, fn = confusion_matrix(df['y'], df['proba'])
print('TP when no. of +ve points >> no. of -ve points: ', tp)
print('TN when no. of +ve points >> no. of -ve points:', tn)
print('FP when no. of +ve points >> no. of -ve points:', fp)
print('FN when no. of +ve points >> no. of -ve points:', fn)


TP when no. of +ve points >> no. of -ve points:  10000
TN when no. of +ve points >> no. of -ve points: 0
FP when no. of +ve points >> no. of -ve points: 100
FN when no. of +ve points >> no. of -ve points: 0


### Compute F1 Score 

In [17]:
# Computes F1 score using precison and recall

Precision_a = precision(tp, fp)
print('Precision when no. of +ve points >> no. of -ve points:', Precision_a)

Recall_a = recall(tp, fn)
print('Recall when no. of +ve points >> no. of -ve points:', Recall_a)

print('F1 Score when no. of +ve points >> no. of -ve points:', f1_score(Precision_a, Recall_a))

Precision when no. of +ve points >> no. of -ve points: 0.9900990099009901
Recall when no. of +ve points >> no. of -ve points: 1.0
F1 Score when no. of +ve points >> no. of -ve points: 0.9950248756218906


Here precision and recall both are high so it results in high F1 score.

### Compute AUC Score

-> To compute AUC score different thresholds are used, the model which has a good score across range of threshold has high score and it separates the classes better.

-> It can be used for imbalanced classification, however in case of severly imbalanced classes it may give optimistic score and not a suitable metric, for severly imbalanced classification AUC based precision and recall is better choice than AUC based on TPR-FPR.

In [18]:
# computes tpr and fpr values and AUC socre based on it
tpr_array, fpr_array = tpr_fpr(df['y'], df['prob_val'])
auc = np.trapz(tpr_array, fpr_array)
print('AUC Score :', auc)

AUC Score : 0.48897750000000006


Here classes are severly skewed so AUC score is low for imbalanced classification, it is worse than the dumb model prediction. Here Precision and Recall based AUC score can be a better alternative.

### Compute Accuracy Score 

In [19]:
accuracy = accuracy_score(tp,tn,fp,fn)
print('Accuracy Score when no. of +ve points >> no. of -ve points : ', accuracy)

Accuracy Score when no. of +ve points >> no. of -ve points :  0.9900990099009901


Here high accuracy score is due to majority of true positives, hence accuracy score should not be used in imbalanced classification

# Problem B

<pre>
<font color='red'><b>B.</b></font> Compute performance metrics for the given data <strong>5_b.csv</strong>
   <b>Note 1:</b> in this data you can see number of positive points << number of negatives points
   <b>Note 2:</b> use pandas or numpy to read the data from <b>5_b.csv</b>
   <b>Note 3:</b> you need to derive the class labels from given score</pre> $y^{pred}= \text{[0 if y_score < 0.5 else 1]}$

<pre>
<ol>
<li> Compute Confusion Matrix </li>
<li> Compute F1 Score </li>
<li> Compute AUC Score, you need to compute different thresholds and for each threshold compute tpr,fpr and then use               numpy.trapz(tpr_array, fpr_array) <a href='https://stackoverflow.com/q/53603376/4084039'>https://stackoverflow.com/q/53603376/4084039</a>, <a href='https://stackoverflow.com/a/39678975/4084039'>https://stackoverflow.com/a/39678975/4084039</a></li>
<li> Compute Accuracy Score </li>
</ol>
</pre>

In [20]:
df_b = pd.read_csv('5_b.csv')

In [21]:
df_b.head()

Unnamed: 0,y,proba
0,0.0,0.281035
1,0.0,0.465152
2,0.0,0.352793
3,0.0,0.157818
4,0.0,0.276648


In [22]:
df_b['prob_val'] = df_b['proba'][:]

In [23]:
df_b['proba'] = threshold(df_b['proba'],0.5)

### Compute Confusion Matrix

In [24]:
# Confusion matrix values True Positive, True Negative, False Positive, False Negative calculations
tp, tn, fp, fn = confusion_matrix(df_b['y'], df_b['proba'])
print('TP when  no. of +ve points << no. of -ve points: ', tp)
print('TN when  no. of +ve points << no. of -ve points:', tn)
print('FP when  no. of +ve points << no. of -ve points:', fp)
print('FN when  no. of +ve points << no. of -ve points:', fn)


TP when  no. of +ve points << no. of -ve points:  55
TN when  no. of +ve points << no. of -ve points: 9761
FP when  no. of +ve points << no. of -ve points: 239
FN when  no. of +ve points << no. of -ve points: 45


### Compute F1 Score

In [25]:
# Computes F1 score using precison and recall

Precision_b = precision(tp, fp)
print('Precision when no. of +ve points << no. of -ve points:', Precision_b)

Recall_b = recall(tp, fn)
print('Recall when no. of +ve points << no. of -ve points:', Recall_b)

print('F1 Score when no. of +ve points << no. of -ve points:', f1_score(Precision_b, Recall_b))

Precision when no. of +ve points << no. of -ve points: 0.1870748299319728
Recall when no. of +ve points << no. of -ve points: 0.55
F1 Score when no. of +ve points << no. of -ve points: 0.2791878172588833


Here precision is very low and recall is low, so balancing them results in low f1 score

### Compute AUC Score

In [26]:
# computes tpr and fpr values and AUC socre based on it
tpr_array, fpr_array = tpr_fpr(df_b['y'], df_b['prob_val'])
auc = np.trapz(tpr_array, fpr_array)
print('AUC Score when  no. of +ve points << no. of -ve points:', auc)

AUC Score when  no. of +ve points << no. of -ve points: 0.9276825


Here for imbalanced classes there is a high AUC.When no. of examples in minority class is small it may give optimistic result.

### Compute Accuracy Score

In [27]:
accuracy = accuracy_score(tp,tn,fp,fn)
print('Accuracy Score when no. of +ve points << no. of -ve points : ', accuracy)

Accuracy Score when no. of +ve points << no. of -ve points :  0.9718811881188119


Here high accuracy is due to majority class and it shouldn't be considered to measure performance.

# Problem C

<font color='red'><b>C.</b></font> Compute the best threshold (similarly to ROC curve computation) of probability which gives lowest values of metric <b>A</b> for the given data <strong>5_c.csv</strong>
<br>

you will be predicting label of a data points like this: $y^{pred}= \text{[0 if y_score < threshold  else 1]}$

$ A = 500 \times \text{number of false negative} + 100 \times \text{numebr of false positive}$

<pre>
   <b>Note 1:</b> in this data you can see number of negative points > number of positive points
   <b>Note 2:</b> use pandas or numpy to read the data from <b>5_c.csv</b>
</pre>

In [28]:
df_c = pd.read_csv('5_c.csv')

In [29]:
df_c.head()

Unnamed: 0,y,prob
0,0,0.458521
1,0,0.505037
2,0,0.418652
3,0,0.412057
4,0,0.375579


In [30]:
def best_threshold(y_true, y_pred):
    """
    Compute the best threshold (similarly to ROC curve computation) of probability
    which gives lowest values of metric A for the given data
    :param y_true: True class labels
    :param y_pred: Predicted probabilities
    :return: Returns minimum value of A for best threshold
    """

    # store values of a with thresholds
    a_v = []

    # arrange threshold in descending order
    thre_list = thre_desc(y_pred)
    for i in thre_list:
        # apply threshold values and convert prob to either 0 or 1
        m = threshold(y_pred, i)
        tp, tn, fp, fn = confusion_matrix(y_true, m)
        a = (500 * fn) + (100 * fp)
        a_v.append((a, i))
    return min(a_v)

best_threshold(df_c['y'], df_c['prob']) 

(147200, 0.2)

Here the best threshold is 0.2 and minimum value of A is 147200.

# Problem D

<pre>
<font color='red'><b>D.</b></font> Compute performance metrics(for regression) for the given data <strong>5_d.csv</strong>
    <b>Note 2:</b> use pandas or numpy to read the data from <b>5_d.csv</b>
    <b>Note 1:</b> <b>5_d.csv</b> will having two columns Y and predicted_Y both are real valued features
<ol>
<li> Compute Mean Square Error </li>
<li> Compute MAPE: https://www.youtube.com/watch?v=ly6ztgIkUxk</li>
<li> Compute R^2 error: https://en.wikipedia.org/wiki/Coefficient_of_determination#Definitions </li>
</ol>
</pre>

In [31]:
df_d = pd.read_csv('5_d.csv')

In [32]:
df_d.head()

Unnamed: 0,y,pred
0,101.0,100.0
1,120.0,100.0
2,131.0,113.0
3,164.0,125.0
4,154.0,152.0


### Mean Squared Error

In [33]:
def mean_square_error(y_true, y_pred):
    """
    Calculates mean square error
    :param y_true: True class labels
    :param y_pred: Predicted class labels
    :return: value of mean square error
    """
    tot_error = 0
    for i in range(len(y_true)):
        pred_error = y_pred[i] - y_true[i]
        tot_error += (pred_error ** 2)
    mean_sqr_error = tot_error / (len(y_true))
    return mean_sqr_error


In [34]:
mse = mean_square_error(df_d['y'], df_d['pred'])
print('Mean Square Error:', mse)

Mean Square Error: 177.16569974554707


Mean square error is affected by outliers.As errors are squared the outliers impact is more, hence it is not suitable for data with outliers.

### Mean Absolute Percentage Error

In [35]:

def mape_metric(y_true, y_pred):
    """
    Calculates Mean Absolute Percentage Error
    :param y_true: True class Lables
    :param y_pred: Predicted class labels
    :return: Mean Absolute Percentage Error
    """
    tot_error = 0
    tot_true = 0
    for i in range(len(y_true)): 
        pred_error = abs(y_pred[i] - y_true[i])
        tot_error += pred_error
        tot_true += y_true[i]
    mape = tot_error / (tot_true) 
    return mape



In [36]:
mape = mape_metric(df_d['y'], df_d['pred'])
print('Mean Absolute Percentage Error:', mape)

Mean Absolute Percentage Error: 0.1291202994009687


MAPE considers error as percentage of error so it does provide insight about importance of error.

### R^2 Error

In [37]:
def mean(x):
    """
    Calculates Mean
    :param x: Values for which mean is to be computed
    :return: Mean of values
    """
    return sum(x) / len(x)


In [38]:
def variance(values, mean):
    """
    Calculates variance
    :param values: List of values
    :param mean: mean of values
    :return: variance
    """

    return sum([(x-mean)**2 for x in values])


In [39]:
def covariance(x,x_mean,y,y_mean): 
    
    x_mean = mean(x)
    y_mean = mean(y)
    covar = sum((x[i] - x_mean) * (y[i] - y_mean) for i in range(len(x))) 
    return covar

In [40]:
def coefficients(true, pred): 
    x = [i for i in true] 
    y = [j for j in pred] 
    x_mean, y_mean = mean(x), mean(y) 
    b1 = covariance(x,x_mean,y,y_mean) / variance(x,x_mean) 
   
    return b1

In [41]:
r_square = coefficients(df_d['y'], df_d['pred'])
print(r_square)

0.9568483147034468


R square value provides insight about how much variance is explained by the model, however it is highly subjective measure and should not be consider without residuals.