In [1]:
import pandas as pd
import numpy as np

# Import the Dataset

In [2]:
from sklearn.datasets import load_iris

iris = load_iris()

x = iris.data
y = iris.target



# Normalization

In [3]:
x = (x-np.min(x)) / (np.max(x) - np.min(x))

# Train - Test Split

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# KNN

In [5]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)

# Cross Validation

In [6]:
from sklearn.model_selection import cross_val_score

accuracies = cross_val_score(estimator=knn, X = x_train, y = y_train, cv = 10)
accuracies

array([1.        , 1.        , 0.90909091, 0.90909091, 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ])

In [7]:
print("Average of accuracy: {}".format(np.mean(accuracies)))

Average of accuracy: 0.9818181818181818


In [8]:
print("STD of accuracy: {}".format(np.std(accuracies)))

STD of accuracy: 0.036363636363636376


# Model Test

In [9]:
knn.fit(x_train, y_train)
print("test accuracy",knn.score(x_test, y_test))

test accuracy 0.9555555555555556


# Grid Search Cross Validition

## KNN

In [10]:
from sklearn.model_selection import GridSearchCV

grid = {"n_neighbors":np.arange(1,50)}
knn = KNeighborsClassifier()

knn_cv = GridSearchCV(knn, grid, cv=10)
knn_cv.fit(x, y)

In [11]:
print("tuned hyperparamater k:", knn_cv.best_params_)
print("tuned best score:", knn_cv.best_score_)

tuned hyperparamater k: {'n_neighbors': 13}
tuned best score: 0.9800000000000001


## Logistic Regression

In [12]:
x = x[:100, :]
y = y[:100]

from sklearn.linear_model import LogisticRegression

grid = {"C":np.logspace(-3, 3, 7), "penalty":["l1", "l2"]}
#l1 = lasso l2 = rigde
logreg = LogisticRegression()
logreg_cv = GridSearchCV(logreg, grid, cv = 10)
logreg_cv.fit(x, y)

print("tuned hyperparamaters:", logreg_cv.best_params_)
print("tuned best accuracy: ", logreg_cv.best_score_)
 

tuned hyperparamaters: {'C': 0.001, 'penalty': 'l2'}
tuned best accuracy:  1.0


70 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "T:\Anaconda\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "T:\Anaconda\lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "T:\Anaconda\lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "T:\Anaconda\lib\site-packages\sklearn\linear_model\_logistic.py", line 56, in _check_solver
    raise ValueError(
ValueError: Solver lb

## Try with train - test data

### Normalization

In [15]:
x = (x - np.min(x)) / (np.max(x) - np.min(x))

### Train - Test Split 

In [17]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test =train_test_split(x, y, test_size = 0.2, random_state = 31)

In [18]:
grid = {"C": np.logspace(-3, 3, 7), "penalty":["l1", "l2"]}
logreg_ = LogisticRegression()
logreg_cv_ = GridSearchCV(logreg_, grid, cv = 10)
logreg_cv_.fit(x_train, y_train)

70 fits failed out of a total of 140.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
70 fits failed with the following error:
Traceback (most recent call last):
  File "T:\Anaconda\lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "T:\Anaconda\lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "T:\Anaconda\lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "T:\Anaconda\lib\site-packages\sklearn\linear_model\_logistic.py", line 56, in _check_solver
    raise ValueError(
ValueError: Solver lb

In [19]:
print("tuned best parameters: ", logreg_cv_.best_params_)
print("tunes best accuracy: ", logreg_cv_.best_score_)

tuned best parameters:  {'C': 0.1, 'penalty': 'l2'}
tunes best accuracy:  1.0


- Let's try these parameters

In [24]:
realLogReg = LogisticRegression(C = 0.1, penalty = "l2")
realLogReg.fit(x_train, y_train)
print("accuracy: ",realLogReg.score(x_test,y_test))

accuracy:  1.0
