# Sklearn - KNN example

In [4]:
import pandas as pd
import numpy as np

df = pd.read_csv('car.data', names=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class'])
print(df.shape)
print(df.info())
df.head()

(1728, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   buying    1728 non-null   object
 1   maint     1728 non-null   object
 2   doors     1728 non-null   object
 3   persons   1728 non-null   object
 4   lug_boot  1728 non-null   object
 5   safety    1728 non-null   object
 6   class     1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB
None


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [54]:
X = df[['buying', 'maint', 'safety']]
y = df[['class']]
print(X.shape)
print(y.shape)

(1728, 3)
(1728, 1)


In [7]:
from sklearn import neighbors, metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [41]:
# X preprocessing
X_values = X.values
Le = LabelEncoder()
for i in range(len(X_values[0])):
  X_values[:, i] = Le.fit_transform(X_values[:, i])
  print('Class {}: '.format(X.columns[i]), Le.classes_)
print('\nData: \n', X_values[:2])

Class buying:  ['high' 'low' 'med' 'vhigh']
Class maint:  ['high' 'low' 'med' 'vhigh']
Class safety:  ['high' 'low' 'med']

Data: 
 [[3 3 1]
 [3 3 2]]


In [56]:
label_mapping = {
    'unacc': 0,
    'acc': 1,
    'good': 2,
    'vgood': 3
}
y_values = y['class'].map(label_mapping)
print(type(y_values))

<class 'pandas.core.series.Series'>


In [72]:
X_train, X_test, y_train, y_test = train_test_split(X_values, y_values, test_size=0.2)

In [73]:
# create model
knn = neighbors.KNeighborsClassifier(n_neighbors=5, weights='uniform')
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

acc = metrics.accuracy_score(pred, y_test)
acc

0.7658959537572254

# Hand Written Recognizer

In [3]:
!pip install mnist

Collecting mnist
  Downloading https://files.pythonhosted.org/packages/c6/c4/5db3bfe009f8d71f1d532bbadbd0ec203764bba3a469e4703a889db8e5e0/mnist-0.2.2-py2.py3-none-any.whl
Installing collected packages: mnist
Successfully installed mnist-0.2.2


In [4]:
import pandas as pd
import mnist
from PIL import Image
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix

In [5]:
# training
X_train = mnist.train_images()
y_train = mnist.train_labels()

# test
X_test = mnist.test_images()
y_test = mnist.test_labels()

In [6]:
print(X_train.shape)
print(X_test.shape)

X_train = X_train.reshape((-1, 28*28))
X_test = X_test.reshape((-1, 28*28))

print(X_train.shape)
print(X_test.shape)

(60000, 28, 28)
(10000, 28, 28)
(60000, 784)
(10000, 784)


In [7]:
print(X_train[0])

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   3  18  18  18 126 136 175  26 166 255
 247 127   0   0   0   0   0   0   0   0   0   0   0   0  30  36  94 154
 170 253 253 253 253 253 225 172 253 242 195  64   0   0   0   0   0   0
   0   0   0   0   0  49 238 253 253 253 253 253 253 253 253 251  93  82
  82  56  39   0   0   0   0   0   0   0   0   0   0   0   0  18 219 253
 253 253 253 253 198 182 247 241   0   0   0   0   

In [8]:
# to minimize CPU need, let's divide our pixel by 256
X_train = X_train/256
X_test = X_test/256

print(X_train[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [9]:
clf = MLPClassifier(solver='adam', activation='relu', hidden_layer_sizes=(64,64))
clf.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(64, 64), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [10]:
pred = clf.predict(X_test)

In [11]:
acc = confusion_matrix(y_test, pred)
acc

array([[ 970,    1,    3,    0,    1,    0,    2,    0,    1,    2],
       [   0, 1122,    3,    1,    0,    0,    2,    1,    5,    1],
       [   2,    1, 1003,    9,    1,    0,    1,    5,   10,    0],
       [   1,    0,    3,  989,    2,    6,    0,    4,    4,    1],
       [   2,    0,    3,    0,  967,    0,    2,    1,    0,    7],
       [   4,    0,    0,    7,    2,  870,    4,    0,    3,    2],
       [   7,    3,    3,    1,    3,    4,  936,    0,    1,    0],
       [   1,    2,    8,    3,    2,    0,    0, 1005,    1,    6],
       [   6,    0,    4,    3,    2,    4,    1,    4,  947,    3],
       [   1,    3,    0,    6,    8,    5,    2,    6,    1,  977]])

In [14]:
def accuracy(cm):
  diagonal = cm.trace()
  elements = cm.sum()
  return diagonal/elements

In [15]:
accuracy(acc)

0.9786