# This Code depicts how to evaluate the Performance Metrics - Rubab Karim

In [1]:
import sklearn
import numpy as np

from sklearn import datasets
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score, accuracy_score, f1_score

import pandas as pd
from collections import Counter

from sklearn.preprocessing import LabelEncoder

In [2]:
k = 5

def euclidean_distance(v1, v2):
    return np.sqrt(np.sum(     (v1 - v2) ** 2    ))

def predict(test_x):
    ## calculate distances between test_x and all data samples in X
    distances = [ euclidean_distance(test_x, x )  for x in X_train   ]
    ## distances is a vector of 30 distances 
    
    ## distances [23, 2,  145, 23  , 5,   17 , 890, ....]  =>>  []
               
    
    ## sort by distance and return the k closest neighbors
    ## argsort returns the indices of the k nearest neighbors
    k_neighbor_indeces =   np.argsort(   distances   )[:k]
    
    ## extract labels from y_train
    labels = [    y_train[i]  for i in k_neighbor_indeces   ]
    ## imagine labels = [1, 1, 1, 0, 1]
    

    ##select the most common label in labels
    most_common_label = Counter(labels).most_common(1)

    return most_common_label

In [3]:
df = pd.read_csv('data/winequality-red.csv', header=None)

X = df.loc[1:, :3].values
X = X.astype(float)

y = df.loc[1:,  4 ].values

le = LabelEncoder()
y = le.fit_transform(   y   )  

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42 )

print(y_test)
print(y_train)
print(  X_train   )
print(  X_test    )

[ 77  45  70  41  40  46  52  56  21  29  70  41  27  40  45  30  52  55
  43  53  49  43  11  27  45  33  29  52  51  67  41  49  39  17  37  35
  34  29  34  22  26  47  22  46 100  37  38  22  10  41  80 148  63   7
  42  58  37  46  34  44  47  48  51  37  47  46  38  57  15  43  29  50
  43  41  34  40   7  51   7  29  77  37  26  45  50  29  47  34  38  23
  52  31  12  38  37  37  36  21 106  39  29  46  79  23  27  37  47  65
  14 131  23  55  24  85  27  37  18  65  45  31  30  59  49  40  32  39
  23  44  56  40  43  86  40  48  41  43  34  43  44  49  38  41  19  39
 100  52  26  80  60  44  40  25  56  45  43  34  46  22  41  47  18  29
  56  74  32  39  28  32 108  56  41  41  25  42  13  45  42  46  74   3
  21  20  58  45  13  83  23  35   4  42  78  52  55  45  53  20  37  36
  48  37  30  99  25  49  37  34  39  23  19  56   9 109  40  33  10  56
  34  34  59  54  60  23  21  48  24  37  36  24  36  44  68  47  61  49
  58  37  39  50 124  29  33  47  28  47 145  46  3

In [5]:
def accuracy(y_pred, y_test):
    accuracy_value = np.sum(y_pred == y_test) / len(y_test)
    return accuracy_value

In [6]:
def print_stats_percentage_train_test(algorithm_name, y_test, y_pred):    
     print("------------------------------------------------------")
     print("------------------------------------------------------")
    
     print("algorithm is: ", algorithm_name)
        
     print('Accuracy: %.2f' % accuracy_score(y_test,   y_pred) )
     
     confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
     print("confusion matrix")
     print(confmat)
     print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, average='weighted'))

In [7]:
list_of_pred_labels = []

for test_x in X_test:
    temp_pred = predict(test_x)   
    list_of_pred_labels.append(   temp_pred[0][0]  )
    
print(list_of_pred_labels)
print("true labels below")
print(y_test)

[31, 69, 49, 18, 40, 25, 25, 77, 44, 50, 31, 43, 39, 41, 49, 30, 41, 31, 42, 35, 31, 43, 13, 42, 42, 33, 48, 48, 34, 34, 41, 35, 39, 43, 60, 23, 58, 13, 38, 37, 27, 18, 31, 48, 29, 39, 29, 52, 75, 33, 43, 73, 49, 17, 39, 40, 37, 87, 48, 44, 40, 31, 52, 31, 92, 15, 38, 41, 51, 66, 102, 33, 56, 32, 46, 53, 52, 51, 38, 36, 77, 29, 26, 45, 47, 62, 47, 34, 21, 28, 45, 85, 52, 38, 26, 30, 36, 27, 39, 40, 25, 42, 52, 87, 63, 37, 30, 46, 85, 41, 5, 54, 37, 81, 23, 84, 30, 40, 34, 39, 0, 56, 89, 42, 63, 42, 28, 46, 56, 13, 56, 43, 40, 86, 33, 44, 42, 27, 55, 40, 38, 26, 25, 32, 29, 35, 20, 87, 59, 54, 40, 23, 60, 45, 56, 58, 39, 59, 34, 48, 40, 17, 34, 34, 28, 39, 47, 39, 25, 57, 40, 60, 25, 53, 61, 35, 66, 34, 74, 4, 21, 17, 40, 73, 43, 48, 43, 51, 56, 53, 40, 48, 26, 36, 38, 20, 29, 36, 33, 37, 44, 59, 43, 33, 43, 13, 36, 41, 17, 91, 8, 57, 40, 46, 96, 63, 119, 33, 60, 60, 60, 23, 27, 55, 30, 114, 44, 42, 102, 35, 43, 31, 39, 34, 58, 37, 39, 47, 124, 40, 39, 81, 41, 47, 55, 46, 40, 81, 31, 43

In [8]:
y_pred = np.array(    list_of_pred_labels    )

print(y_pred)
print("true labels below")
print(y_test)

[ 31  69  49  18  40  25  25  77  44  50  31  43  39  41  49  30  41  31
  42  35  31  43  13  42  42  33  48  48  34  34  41  35  39  43  60  23
  58  13  38  37  27  18  31  48  29  39  29  52  75  33  43  73  49  17
  39  40  37  87  48  44  40  31  52  31  92  15  38  41  51  66 102  33
  56  32  46  53  52  51  38  36  77  29  26  45  47  62  47  34  21  28
  45  85  52  38  26  30  36  27  39  40  25  42  52  87  63  37  30  46
  85  41   5  54  37  81  23  84  30  40  34  39   0  56  89  42  63  42
  28  46  56  13  56  43  40  86  33  44  42  27  55  40  38  26  25  32
  29  35  20  87  59  54  40  23  60  45  56  58  39  59  34  48  40  17
  34  34  28  39  47  39  25  57  40  60  25  53  61  35  66  34  74   4
  21  17  40  73  43  48  43  51  56  53  40  48  26  36  38  20  29  36
  33  37  44  59  43  33  43  13  36  41  17  91   8  57  40  46  96  63
 119  33  60  60  60  23  27  55  30 114  44  42 102  35  43  31  39  34
  58  37  39  47 124  40  39  81  41  47  55  46  4

### Here, we define the Regression Models that are going to be used in this analysis.
a) Linear Regression Model

In [9]:
def model_fn_linear():
    return nn.Linear(1, 1)

### b) Multi-Layer Perceptron

In [10]:
def model_fn_nn_1hidden():
    
    seq_model = nn.Sequential(
        nn.Linear(1599, 599),
        nn.ReLU(),
        
        nn.Linear(599, 1),
    
    )
    
   # optimizer = optim.SGD(
   #    model_fn_nn_1hidden.parameters(),
   #    lr=1e-3  )
    
    return seq_model

### c) Deep Neural Network with 2 hidden layers

In [11]:
def model_deep_learning_2hidden():
    seq_model = nn.Sequential(
        nn.Linear(1599, 1099),
        nn.ReLU(),
        
        nn.Linear(1099, 599),
        nn.Tanh(),
        
        nn.Linear(599, 1)
    )
    
    return seq_model

### d) Deep Neural Network with 4 hidden layers

In [12]:
def model_deep_learning_4hidden():
    seq_model = nn.Sequential(
        nn.Linear(1599, 1099),
        nn.ReLU(),
        
        nn.Linear(1099, 599),
        nn.Tanh(),
        
        nn.Linear(599, 199),
        nn.Tanh(),
        
        nn.Linear(199, 99),
        nn.Tanh(),
        
        nn.Linear(99, 1)
    )
    
    return seq_model

In [13]:
res = print_stats_percentage_train_test('model_deep_learning_4hidden', y_test, y_pred)      #knn

print(    res   )

------------------------------------------------------
------------------------------------------------------
algorithm is:  model_deep_learning_4hidden
Accuracy: 0.15
confusion matrix
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Precision: 0.162
Recall: 0.150
F1-measure: 0.147
None


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
