In [38]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [39]:
data = load_breast_cancer()
x = data.data
y = data.target
features = data.feature_names

In [40]:
# print (f'feature:{features}\nx:{x}\ny:{y}')
# df = pd.DataFrame(x, columns=features)
# df.describe

In [41]:
def y_changer(y):
    return np.where(y==0, -1, 1) # change all elements of y equal to 0 to new value -1 else 1

In [42]:
class perceptron:
    def __init__(self, epoch=1000, learning_rate=0.01):
        self.epoch = epoch
        self.learning_rate = learning_rate
        self.w = None
        self.misclass =[]
        self.accuracy = 0

        self.TP = None
        self.TN = None
        self.FP = None
        self.FN = None

    def xw_mapper (self, x):
        return np.where(x>=0 , 1 , -1)
    
    def fit(self, x, y):
        n, m = x.shape #samples, features
        self.w = np.zeros(m+1)
        x = np.c_[np.ones(n), x]
        for ep in range(self.epoch):
            num_misclassified = 0
            for i in range(n):
                xw =  x[i] @ self.w
                y_hat = self.xw_mapper(xw)
                if y_hat != y[i]:                    
                    self.w += self.learning_rate * y[i] * x[i]
                    num_misclassified += 1
            self.misclass.append(num_misclassified)
            if num_misclassified ==0:
                break
        self.accuracy = 1-num_misclassified/n
    def pred(self, x):
        x = np.c_[np.ones(x.shape[0]), x]
        return self.xw_mapper(x @ self.w)
    def metric(self, y_hat, y):
        self.TP = np.sum((y == 1) & (y_hat == 1))  # True Positive
        self.TN = np.sum((y == -1) & (y_hat == -1))  # True Negative
        self.FP = np.sum((y == -1) & (y_hat == 1))  # False Positive
        self.FN = np.sum((y == 1) & (y_hat == -1))  # False Negative
    def Accuracy_t (self):
        return (self.TP + self.TN) / (self.TP + self.TN + self.FP + self.FN)
    def Precision (self):
        return self.TP / (self.TP + self.FP) if (self.TP + self.FP) > 0 else 0
    def Recall (self):
        return self.TP / (self.TP + self.FN) if (self.TP + self.FN) > 0 else 0
    def F1_score(self):
        return  2 * (self.Precision() * self.Recall()) / (self.Precision() + self.Recall()) if (self.Precision() + self.Recall()) > 0 else 0
    def Specificity (self):
        return self.TN / (self.TN + self.FP) if (self.TN + self.FP) > 0 else 0
    def NPV (self):
        return self.TN / (self.TN + self.FN) if (self.TN + self.FN) > 0 else 0

In [43]:
y = y_changer(y)
x_train, x_test, y_train, y_test = train_test_split (x, y, test_size = 0.2, random_state=42)
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [44]:
from IPython.display import Math
formulas = r'''
\text{Accuracy} = \frac{\text{TP} + \text{TN}}{\text{TP} + \text{TN} + \text{FP} + \text{FN}} \qquad \text{when train data is symetric} 
\\ \text{ }
\\ \text{ }
\\ \text{> when we know the model's prediction (positive) or (negative), NPV and precision are used}
\\ \text{NPV (negative predictive value)} = P(\text{case is healthy} \mid \text{model predict healthy})  = \frac{\text{TN}}{\text{TN} + \text{FN}} 
\\ \text{Precision} = P(\text{case is ill} \mid \text{model predict ill}) = \frac{\text{TP}}{\text{TP} + \text{FP}} 
\\      1-\text{Precision} = P(\text{case is healthy} \mid \text{model predict ill})
\\      \text{when cost of False positive is high like false detecting a case as malignant (idle financial and stress costs for treatment)} \qquad \text{normal range: 0.90-0.95} \\
\\ \text{ }
\\ \text{ }
\\ \text{> when we know the case condition ill or healthy, senstivity (recall) and (specificity) are used}
\\ \text{Sensitivity (Recall)} = P(\text{model predict ill} \mid \text{case is ill})= \frac{\text{TP}}{\text{TP} + \text{FN}} 
\\      1-\text{Recall} = P(\text{model predict healthy} \mid \text{case is ill})
\\      \text{when cost of False negative is high like false detecting a case as benign (high chance of death) 1-senstivity should be low} \qquad \text{normal range: 0.90-0.95}
\\ \text{Specificity} = P(\text{model predict healthy} \mid \text{case is healthy}) =\frac{\text{TN}}{\text{TN} + \text{FP}} \qquad \text{the power of model in detecting real negatives}
\\ \text{ }
\\ \text{ }
\\ \text{F1-Score} = 2 \times \frac{\text{Precision} \times \text{Recall}}{\text{Precision} + \text{Recall}} \qquad \text{normal range: 0.80-0.90}
\\ \text{ }
\\ \text{Balanced Accuracy} = \frac{\text{Recall} + \text{Specificity}}{2} \qquad \text{when train data is dissymmetric}
\\ \text{ }
\\ \text{ }
\\ \text{Log Loss} = -\frac{1}{N} \sum_{i=1}^{N} \left[ y_i \log(p_i) + (1-y_i) \log(1-p_i) \right]
\\ \text{Expected Accuracy} = \frac{(\text{TP} + \text{FP}) \times (\text{TP} + \text{FN}) + (\text{TN} + \text{FP}) \times (\text{TN} + \text{FN})}{(\text{TP} + \text{TN} + \text{FP} + \text{FN})^2}
\\ \text{Cohen's Kappa} = \frac{\text{Accuracy} - \text{Expected Accuracy}}{1 - \text{Expected Accuracy}} \qquad \text{the remove effect of chance}
\\ \text{TPR} = \frac{\text{TP}}{\text{TP} + \text{FN}}, \quad \text{FPR} = \frac{\text{FP}}{\text{FP} + \text{TN}}
\\ \text{AUC} = \int_{0}^{1} \text{TPR}(FPR) \, d(FPR)
\\ \text{MCC} = \frac{\text{TP} \times \text{TN} - \text{FP} \times \text{FN}}{\sqrt{(\text{TP} + \text{FP})(\text{TP} + \text{FN})(\text{TN} + \text{FP})(\text{TN} + \text{FN})}}
'''
Math(formulas)

<IPython.core.display.Math object>

In [45]:
mymodel = perceptron()
mymodel.fit(x_train_scaled, y_train)

In [46]:
print(f'train accuracy: {mymodel.accuracy}')
y_hat_test= mymodel.pred(x_test_scaled)
mymodel.metric(y_hat_test,y_test)

train accuracy: 0.9868131868131869


In [47]:
print (f'accuracy: {mymodel.Accuracy_t()}\t Precision: {mymodel.Precision()}\t Recall: {mymodel.Recall()}\t F1_score: {mymodel.F1_score()}\t specificity:{mymodel.Specificity()}')

accuracy: 0.9298245614035088	 Precision: 0.9846153846153847	 Recall: 0.9014084507042254	 F1_score: 0.9411764705882353	 specificity:0.9767441860465116


In [48]:
Precision_treshold = 0.92
NPV_treshold = 0.95
F1_treshold = 0.80
print('Based on metric results')
if mymodel.Precision() < Precision_treshold or mymodel.Recall() < Recall_treshold :
    if mymodel.Precision() < Precision_treshold  :
        print ('When the model detect a case as malignant')
    elif mymodel.NPV() < NPV_treshold:
        print ('When the model detect a case as benign')
    print('It is recommended to redo examnations')
if mymodel.F1_score() < F1_treshold:
    print('Redesign your model')

Based on metric results
When the model detect a case as benign
It is recommended to redo examnations


np.float64(0.8571428571428571)

In [64]:
num_healthies_train =len(y_train[y_train==-1])
num_ills_train = len(y_train[y_train==1])
ratio =num_ills_train/ num_healthies_train
print (f'healthy: {num_healthies_train}, ill: {num_ills_train}, ill2healthy: {ratio}')

healthy: 169, ill: 286, ill2healthy: 1.6923076923076923
