In [67]:
from sklearn import datasets
import pandas as pd
import numpy as np

In [68]:
# import data
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns = iris.feature_names)
df['target'] = iris.target


In [69]:
from sklearn.model_selection import train_test_split

X = df.iloc[:, [0,1,2, 3]].values
y = df.iloc[:, 4].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [71]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# from sklearn.preprocessing import OneHotEncoder
# oh = OneHotEncoder()
# yTrain = oh.fit_transform(yTrain).toarray()


In [72]:
# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state = 0, solver='lbfgs', multi_class='auto')
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

In [73]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Predict probabilities
probs_y=classifier.predict_proba(X_test)

In [74]:
### Print results 
probs_y = np.round(probs_y, 2)
res = "{:<10} | {:<10} | {:<10} | {:<13} | {:<5}".format("y_test", "y_pred", "Setosa(%)", "versicolor(%)", "virginica(%)\n")
res += "-"*65+"\n"
res += "\n".join("{:<10} | {:<10} | {:<10} | {:<13} | {:<10}".format(x, y, a, b, c) for x, y, a, b, c in zip(y_test, y_pred, probs_y[:,0], probs_y[:,1], probs_y[:,2]))
res += "\n"+"-"*65+"\n"
print(res)


y_test     | y_pred     | Setosa(%)  | versicolor(%) | virginica(%)
-----------------------------------------------------------------
2          | 2          | 0.0        | 0.03          | 0.97      
1          | 1          | 0.01       | 0.95          | 0.04      
0          | 0          | 1.0        | 0.0           | 0.0       
2          | 2          | 0.0        | 0.08          | 0.92      
0          | 0          | 0.98       | 0.02          | 0.0       
2          | 2          | 0.0        | 0.01          | 0.99      
0          | 0          | 0.98       | 0.02          | 0.0       
1          | 1          | 0.01       | 0.71          | 0.28      
1          | 1          | 0.0        | 0.73          | 0.27      
1          | 1          | 0.02       | 0.89          | 0.08      
2          | 2          | 0.0        | 0.44          | 0.56      
1          | 1          | 0.02       | 0.76          | 0.22      
1          | 1          | 0.01       | 0.85          | 0.13      
1       

In [134]:
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import classification_report
#metrics
# print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
# print('Precision Score : ' + str(precision_score(y_test,y_pred, average='macro')))
# print('Recall Score : ' + str(recall_score(y_test,y_pred, average='macro')))
# print('F1 Score : ' + str(f1_score(y_test,y_pred, average='macro')))

labels = np.unique(y_pred)
print(classification_report(y_test, y_pred, labels=labels))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      0.94      0.97        16
           2       0.90      1.00      0.95         9

    accuracy                           0.97        38
   macro avg       0.97      0.98      0.97        38
weighted avg       0.98      0.97      0.97        38



In [113]:
# Accuracy
from decimal import *
def accuracy(yTest, yPred):
    """
    Classification accuracy is a ratio of the number of correct predictions out of all predictions that were made.
    accuracy = TP+TN / FP+FN+TP+TN
    """

    correct = 0
    for i in range(len(yTest)):
        if yTest[i] == yPred[i]:
            correct += 1
    return correct / float(len(yTest)) 

# Precision
def precision(yTest, yPred):
    """
    Precision is the ratio between the true positives and all the points that are classified as positives.
    precision = TP/(TP + FP)
    """

    tp2 = 0
    fp2 = 0
    for i in range(len(yTest)):
        if yTest[i] == yPred[i]:
            tp2 += 1
            
        if yTest[i] != yPred[i]:
            fp2 += 1
    
    return float(tp2 / (tp2 + fp2))

# Recall
def recall(yTest, yPred):
    """
    Recall is the measure of the model correctly identifying true positives. 
    recall = TP/(TP + FN)
    """

    tp2 = 0
    fn2 = 0
    for i in range(len(yTest)):
        if yTest[i] == yPred[i]:
            tp2 += 1
            
        if yTest[i] != yPred[i]:
            fn2 += 1
    
    return float(tp2 / (tp2 + fn2))

# F1 score
def F1score(yTest, yPred):
    """
    F1 score is the combination of precision and recall. 
    F1 score = (2 * precision * recall) / (precision + recall)
    """

    Precision = precision(yTest, yPred)
    Recall = recall(yTest, yPred)

    return (2 * Precision * Recall) / (Precision + Recall)

print('Accuracy Score : ' + str(accuracy(y_test,y_pred)))
print('Precision Score : ' + str(precision(y_test,y_pred)))
print('Recall Score : ' + str(recall(y_test,y_pred)))
print('F1 Score : ' + str(F1score(y_test,y_pred)))

Accuracy Score : 0.9736842105263158
Precision Score : 0.9736842105263158
Recall Score : 0.9736842105263158
F1 Score : 0.9736842105263158


In [140]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y_test, y_pred)

FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix) 
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP)
FP = FP.astype(float)
FN = FN.astype(float)
TP = TP.astype(float)
TN = TN.astype(float)
# Sensitivity, hit rate, recall, or true positive rate
TPR = TP/(TP+FN)
# Specificity or true negative rate
TNR = TN/(TN+FP) 
# Precision or positive predictive value
PPV = TP/(TP+FP)
# Negative predictive value
NPV = TN/(TN+FN)
# Fall out or false positive rate
FPR = FP/(FP+TN)
# False negative rate
FNR = FN/(TP+FN)
# False discovery rate
FDR = FP/(TP+FP)
# Overall accuracy for each class
ACC = (TP+TN)/(TP+FP+FN+TN)

#accuracy
accuracy = (TP + TN)/(FP + FN + TP + TN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1score = (2 * precision * recall) / (precision + recall)

print("TPR", TPR)
print("TNR", TNR)
print("PPV", PPV)
print("NPV", NPV)
print("FNR", FNR)
print("ACC", ACC)
print(" ")
print(" ")

print("accuracy", accuracy)
print("precision", precision)
print("recall", recall)
print("f1score", f1score)

TPR [1.     0.9375 1.    ]
TNR [1.         1.         0.96551724]
PPV [1.  1.  0.9]
NPV [1.         0.95652174 1.        ]
FNR [0.     0.0625 0.    ]
ACC [1.         0.97368421 0.97368421]
 
 
accuracy [1.         0.97368421 0.97368421]
precision [1.  1.  0.9]
recall [1.     0.9375 1.    ]
f1score [1.         0.96774194 0.94736842]


In [None]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import classification_report

# sc = StandardScaler()
# X_train = sc.fit_transform(xTrain)
# X_test = sc.transform(xTest)


# classifier = LogisticRegression(random_state = 0, solver='lbfgs', multi_class='auto')
# classifier.fit(X_train, yTrain)

# y_pred = classifier.predict(X_test)

# # labels = np.unique(y_pred)
# # print(accuracy(yTest, y_pred))
