In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

import aquire
import prepare
import split_scale

import warnings
warnings.filterwarnings("ignore")

#### Fit the logistic regression classifier to your training sample and transform, i.e. make predictions on the training sample

In [2]:
df, encoder = prepare.prep_iris(aquire.get_iris_data())
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
train, test = split_scale.split_my_data(df, .8)

In [4]:
X_train = train.drop(columns="species")
X_test = test.drop(columns="species")
y_train = train[["species"]]
y_test = test[["species"]]

In [5]:
# create object 
logit = LogisticRegression(C=1, class_weight={1:2}, random_state = 123, solver='saga')

#fit object
logit.fit(X_train, y_train)

LogisticRegression(C=1, class_weight={1: 2}, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=123, solver='saga', tol=0.0001, verbose=0,
                   warm_start=False)

In [6]:
print('Coefficient: \n', logit.coef_)
print('Intercept: \n', logit.intercept_)

Coefficient: 
 [[ 0.31190115  1.44915589 -2.33359923 -1.02353246]
 [ 0.44119976 -1.70023706  0.60904436 -1.4215582 ]
 [-1.64209783 -1.73568864  2.69199299  2.48328471]]
Intercept: 
 [ 0.58799989  1.95748209 -2.58666863]


#### Evaluate your in-sample results using the model score, confusion matrix, and classification report.

In [7]:
y_pred = logit.predict(X_train)
y_pred_proba = logit.predict_proba(X_train)
print('Accuracy of Logistic Regression classifier on training set: {:.2f}'
     .format(logit.score(X_train, y_train)))

Accuracy of Logistic Regression classifier on training set: 0.94


In [8]:
print(confusion_matrix(y_train, y_pred))

[[37  0  0]
 [ 0 43  1]
 [ 0  6 33]]


In [9]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.88      0.98      0.92        44
           2       0.97      0.85      0.90        39

    accuracy                           0.94       120
   macro avg       0.95      0.94      0.94       120
weighted avg       0.95      0.94      0.94       120



#### Print and clearly label the following: Accuracy, true positive rate, false positive rate, true negative rate, false negative rate, precision, recall, f1-score, and support.

#### Run through steps using another solver

In [19]:
# create object 
logit = LogisticRegression(C=1, class_weight={1:2}, random_state = 123, solver='liblinear')

#fit object
logit.fit(X_train, y_train)

LogisticRegression(C=1, class_weight={1: 2}, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=123, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)

In [20]:
print('Coefficient: \n', logit.coef_)
print('Intercept: \n', logit.intercept_)

Coefficient: 
 [[ 0.37830241  1.39373078 -2.16975423 -0.95877223]
 [ 0.53325407 -1.61393357  0.61682878 -1.48608802]
 [-1.58600296 -1.57640704  2.41071353  2.2050944 ]]
Intercept: 
 [ 0.24819862  1.20458773 -1.05526796]


In [21]:
y_pred = logit.predict(X_train)
y_pred_proba = logit.predict_proba(X_train)
print('Accuracy of Logistic Regression classifier on training set: {:.2f}'
     .format(logit.score(X_train, y_train)))

Accuracy of Logistic Regression classifier on training set: 0.94


In [22]:
print(confusion_matrix(y_train, y_pred))

[[37  0  0]
 [ 0 41  3]
 [ 0  4 35]]


In [23]:
print(classification_report(y_train, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        37
           1       0.91      0.93      0.92        44
           2       0.92      0.90      0.91        39

    accuracy                           0.94       120
   macro avg       0.94      0.94      0.94       120
weighted avg       0.94      0.94      0.94       120

