In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


In [2]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
from sklearn.datasets import load_iris

In [4]:
iris = load_iris()

In [5]:
x = iris.data
y = iris.target


In [6]:
iris.DESCR



In [7]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=222)

In [8]:
model = LogisticRegression()

In [9]:
model.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [10]:
y_pred = model.predict(X_test)

In [11]:
print(accuracy_score(y_test, y_pred))

0.9111111111111111


In [12]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.77      0.91      0.83        11
           2       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.90      0.90      0.90        45
weighted avg       0.92      0.91      0.91        45



In [13]:
print(confusion_matrix(y_test, y_pred))

[[19  0  0]
 [ 0 10  1]
 [ 0  3 12]]


### KNN

In [14]:
from sklearn.neighbors import KNeighborsClassifier

In [15]:
knn = KNeighborsClassifier()

In [16]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [17]:
pred = knn.predict(X_test)

In [18]:
print(accuracy_score(y_test, pred))

0.9111111111111111


In [19]:
# K = 1 to 30

In [20]:
kvalue_range = list(range(1, 30))
scores = []


In [21]:
for kvalue in kvalue_range:
    knn = KNeighborsClassifier(n_neighbors = kvalue)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))
    
print(scores)

[0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9333333333333333, 0.9333333333333333, 0.9111111111111111, 0.9111111111111111, 0.9333333333333333, 0.9555555555555556, 0.9333333333333333, 0.9777777777777777, 0.9111111111111111, 0.9777777777777777, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111, 0.9111111111111111]


### Grid search cv

In [22]:
from sklearn.model_selection import GridSearchCV


In [23]:
kvalue_range = list(range(1,25))

In [24]:
parameter_grid = dict(neighbors = kvalue_range)

In [25]:
grid = {'kernel':('linear','rbf'), 'c' : [1,10,100]}

In [26]:
print(parameter_grid)

{'neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]}


In [27]:
grid = GridSearchCV(knn, parameter_grid, cv = 8, scoring = 'accuracy')

In [28]:
grid.fit(X_train, y_train)

ValueError: Invalid parameter neighbors for estimator KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=29, p=2,
                     weights='uniform'). Check the list of available parameters with `estimator.get_params().keys()`.

In [None]:
grid.fit(x, y)

In [None]:
grid.grid_scores

## Example 2

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [None]:
commerce = pd.read_csv('/root/Projects/Ecommerce-Customers.csv')

In [None]:
commerce.head()

In [None]:
commerce.info()

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()

In [None]:
cancer_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)

In [None]:
cancer_df.head()

In [None]:
cancer.DESCR

In [None]:
cancer.target_names

In [None]:
cancer.feature_names

In [None]:
cancer_df['Type'] = cancer.target

In [None]:
cancer_df.head()

In [None]:
cancer_df.shape

In [None]:
X= cancer_df.iloc[:,:30]

In [None]:
X.head()

In [None]:
y = cancer_df.iloc[:,30:]

In [None]:
y

In [None]:
X_train, X_test , y_train, y_test = train_test_split(X,y,test_size=0.25)

In [None]:
clf = SVC()

In [None]:
clf.fit(X,y)

In [None]:
pred = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
accuracy_score(y_test, pred)

In [None]:
print(classification_report(y_test, pred))

In [None]:
confusion_matrix(y_test, pred)

##