In [1]:
import sklearn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
sklearn.__version__

'0.23.1'

In [3]:
## sklearn.datasets.load_iris(*, return_X_y=False, as_frame=False)  # as_frame is new in 0.23

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [5]:
iris = load_iris()

In [8]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [None]:
iris.feature_names

In [None]:
iris.target_names

In [None]:
X = pd.DataFrame(iris.data, columns = iris.feature_names)
X.columns = ['sepal_length','sepal_width','petal_length','petal_width']

In [None]:
X.shape

In [None]:
y = iris.target

In [None]:
iris.target_names

In [None]:
X.sample(5)

In [None]:
y

In [None]:
sns.relplot(x='petal_length',y='petal_width', data = X, hue=y)

In [None]:
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.8, random_state=100)

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(X_train)

In [None]:
X_train_std = ss.transform(X_train)
X_test_std = ss.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train_std,y_train)

In [None]:
y_pred = model.predict(X_test_std)

In [None]:
y_pred

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_pred)
cm

In [None]:
sns.heatmap(cm, annot=True, cbar=False)
plt.xlabel("Predicted")
plt.ylabel("Actual")

In [None]:
from sklearn.metrics import plot_confusion_matrix

In [None]:
plot_confusion_matrix(model,X_test,y_test, display_labels=iris.target_names, normalize='true')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
print(f'Macro Avg Precision : {(1.0 + .80 + .97)  / 3:.2f}')

In [None]:
print(f'Weighted Avg Recall :  {(1.0 * 42 + .80 * 36 + .97 * 42) / 120  :.2f}')

In [None]:
f"{10.4433:.2f}"

In [None]:
y_prob_pred = model.predict_proba(X_test_std)

In [None]:
np.set_printoptions(formatter={'float': lambda x : f"{x:0.2f}"})  # change format of float 

In [None]:
y_prob_pred[:10]

In [None]:
cf = model.coef_

In [None]:
cf

In [None]:
intercept = model.intercept_

In [None]:
intercept

In [None]:
row1 = X_test_std[0,:]  # first row in test data 

In [None]:
y_value = intercept[0] + row1[0] * cf[0,0] +  row1[1] * cf[0,1] + row1[2] * cf[0,2] + row1[3] * cf[0,3]

In [None]:
1 / (1 + np.exp(y_value))