# T2 Binary Classification using Logistic regression (Iris data)
## Multivariate

## 1. Setup

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


## 2. Fetch, Load and Visualize data

### 2.1 Fetch data from sklearn datasets

In [2]:
iris = pd.read_csv("C:\\Users\\Rove Labs\\Desktop\\iris.csv")

NameError: name 'pd' is not defined

### 2.2 Exploratory Data Analysis (EDA)

In [None]:
iris.head()

In [None]:
iris.info()

In [None]:
iris.describe()

### 2.2 Visualize Dataset

In [None]:
import pandas as pd

positive = iris[iris['target'].isin([2])]
negative = iris[iris['target'].isin([0,1])]

In [None]:
%matplotlib inline
fig, ax = plt.subplots(figsize=(6, 6))
ax.scatter(positive['PW'], positive['PL'], 
           s=50, c='g', marker='o', label='Virginica', alpha = 0.6)
ax.scatter(negative['PW'], negative['PL'], 
           s=50, c='r', marker='x', label='Not Virginika')
ax.legend()
ax.set_xlabel('Petal Width (cm)')
ax.set_ylabel('Petal Length (cm)')
ax.grid()
plt.show()

###  2.3 Prepare dataset

In [None]:
X = iris.iloc[:, [2, 3]].values # petal length, petal width
y = (iris.iloc[:, 4]== 2).astype(np.int)

## 3. Training Model : Logistic Regression 

### 3.1 Logistic Classifier

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(solver="lbfgs", C=10**10, random_state=42)
log_reg.fit(X, y)

###  3.2 Visualize Decision Boundary (Multivariate)

In [None]:
x0, x1 = np.meshgrid(
        np.linspace(2.9, 7, 500).reshape(-1, 1),
        np.linspace(0.8, 2.7, 200).reshape(-1, 1),
    )
X_new = np.c_[x0.ravel(), x1.ravel()]

y_proba = log_reg.predict_proba(X_new)

plt.figure(figsize=(10, 4))
plt.plot(X[y==0, 0], X[y==0, 1], "bs")
plt.plot(X[y==1, 0], X[y==1, 1], "g^")

zz = y_proba[:, 1].reshape(x0.shape)
contour = plt.contour(x0, x1, zz, cmap=plt.cm.brg)


left_right = np.array([2.9, 7])
boundary = -(log_reg.coef_[0][0] * left_right + log_reg.intercept_[0]) / log_reg.coef_[0][1]

plt.clabel(contour, inline=1, fontsize=12)
plt.plot(left_right, boundary, "k--", linewidth=3)
plt.text(3.5, 1.5, "Not Iris-Virginica", fontsize=14, color="b", ha="center")
plt.text(6.5, 2.3, "Iris-Virginica", fontsize=14, color="g", ha="center")
plt.xlabel("Petal length", fontsize=14)
plt.ylabel("Petal width", fontsize=14)
plt.axis([2.9, 7, 0.8, 2.7])
plt.show()

###  3.3 Predicition with model

In [None]:
boundary

In [None]:
log_reg.predict([[4.8, 1.8]])

In [None]:
log_reg.predict_proba([[4.8, 1.8]])

### 4. Model Evalution

#### 4.1 Confusion Matrix

In [None]:
y_pred = log_reg.predict(X)

In [None]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y, y_pred)
cnf_matrix

In [None]:
result = confusion_matrix(y, y_pred)

print("True Negative : " + str(result[0,0]))
print("False Negative: " + str(result[1,0]))
print("False Positive: " + str(result[0,1]))
print("True Positive : " + str(result[1,1]))

#### 4.2 Performance Measure  

In [None]:
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import accuracy_score, f1_score

print("Accuracy after CV :", accuracy_score(y, y_pred))
print("Pricision after CV:", precision_score(y, y_pred))
print("Recall after CV   :", recall_score(y, y_pred))
print("f1_score after CV :", f1_score(y, y_pred))

#### 4.3  ROC Curve

In [None]:
from sklearn.metrics import roc_curve

y_scores = log_reg.predict_proba(X)[::,1]

fpr, tpr, thresholds = roc_curve(y, y_scores)

In [None]:
def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--') # dashed diagonal
    plt.axis([-0, 1, 0, 1.1])                                    
    plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16) 
    plt.ylabel('True Positive Rate (Recall)', fontsize=16)    
    plt.grid(True)                                            

plt.figure(figsize=(6, 6))                         
plot_roc_curve(fpr, tpr)                      
plt.show()

#### 4.4  AUC Score

In [None]:
from sklearn.metrics import roc_auc_score

roc_auc_score(y, y_scores)

#### 4.5  Classification Report

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y, y_pred))