## ML Assignment 1 - MNIST, DT, SVM
prof. Eunwoo Kim

CAU SW 20184286 Donghwa Lee

2023.04.21

In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
from sklearn.model_selection import GridSearchCV
import pandas as pd

In [2]:
# MNIST
mnist_train_transform = transforms.Compose([transforms.ToTensor()])
mnist_test_transform = transforms.Compose([transforms.ToTensor()])

trainset_mnist = datasets.MNIST(root='./data', train=True, download=True,transform=mnist_train_transform)
testset_mnist = datasets.MNIST(root='./data', train=False, download=True,transform=mnist_test_transform)

MNIST_train = DataLoader(trainset_mnist, batch_size=32, shuffle=True, num_workers=2)
MNIST_test = DataLoader(testset_mnist, batch_size=32, shuffle=False, num_workers=2)

# MNIST train
MNIST_train_images = []
MNIST_train_labels = []

for batch in MNIST_train:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    MNIST_train_images.append(images_flat.numpy())
    MNIST_train_labels.append(labels.numpy())

MNIST_train_images = np.vstack(MNIST_train_images)
MNIST_train_labels = np.concatenate(MNIST_train_labels)

# MNIST test
MNIST_test_images = []
MNIST_test_labels = []

for batch in MNIST_test:
    images, labels = batch
    images_flat = images.view(images.shape[0], -1)
    MNIST_test_images.append(images_flat.numpy())
    MNIST_test_labels.append(labels.numpy())
    
MNIST_test_images = np.vstack(MNIST_test_images)
MNIST_test_labels = np.concatenate(MNIST_test_labels)

# X = images
# y = labels

In [3]:
def show_accuracy(y_true, y_pred, model_name):
    return pd.Series({'accuracy':accuracy_score(y_true, y_pred)},
                      name=model_name)

### Decision Tree

In [4]:
params_grid = {'min_samples_split':[2, 5, 10],
              'min_samples_leaf': [1, 2, 4],
              'max_leaf_nodes': [5, 10, None]}

##### depth : 3

In [5]:
DT_d3 = DecisionTreeClassifier(max_depth=3)

grid_DT = GridSearchCV(DT_d3,
                       param_grid=params_grid,
                       cv=5)

In [6]:
grid_DT.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = grid_DT.predict(MNIST_train_images)
MNIST_test_labels_pred = grid_DT.predict(MNIST_test_images)

MNIST_DT_d3_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_DT_d3_train'),
                                  show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_DT_d3_test')],
                                  axis=1)

MNIST_DT_d3_accuracy

Unnamed: 0,MNIST_DT_d3_train,MNIST_DT_d3_test
accuracy,0.491517,0.4953


##### depth : 6

In [7]:
DT_d6 = DecisionTreeClassifier(max_depth=6)

grid_DT = GridSearchCV(DT_d6,
                       param_grid=params_grid,
                       cv=5)

In [8]:
grid_DT.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = grid_DT.predict(MNIST_train_images)
MNIST_test_labels_pred = grid_DT.predict(MNIST_test_images)

MNIST_DT_d6_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_DT_d6_train'),
                                  show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_DT_d6_test')],
                                  axis=1)

MNIST_DT_d6_accuracy

Unnamed: 0,MNIST_DT_d6_train,MNIST_DT_d6_test
accuracy,0.73825,0.7415


#### depth : 9

In [9]:
DT_d9 = DecisionTreeClassifier(max_depth=9)

grid_DT = GridSearchCV(DT_d9,
                       param_grid=params_grid,
                       cv=5)

In [10]:
grid_DT.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = grid_DT.predict(MNIST_train_images)
MNIST_test_labels_pred = grid_DT.predict(MNIST_test_images)

MNIST_DT_d9_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_DT_d9_train'),
                                  show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_DT_d9_test')],
                                  axis=1)

MNIST_DT_d9_accuracy

Unnamed: 0,MNIST_DT_d9_train,MNIST_DT_d9_test
accuracy,0.8661,0.8505


#### depth : 12

In [11]:
DT_d12 = DecisionTreeClassifier(max_depth=12)

grid_DT = GridSearchCV(DT_d12,
                       param_grid=params_grid,
                       cv=5)

In [12]:
grid_DT.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = grid_DT.predict(MNIST_train_images)
MNIST_test_labels_pred = grid_DT.predict(MNIST_test_images)

MNIST_DT_d12_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_DT_d12_train'),
                                  show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_DT_d12_test')],
                                  axis=1)

MNIST_DT_d12_accuracy

Unnamed: 0,MNIST_DT_d12_train,MNIST_DT_d12_test
accuracy,0.93465,0.8774


### SVM

#### linear

In [17]:
SVM_linear = svm.SVC(kernel='linear')

In [18]:
SVM_linear.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = SVM_linear.predict(MNIST_train_images)
MNIST_test_labels_pred = SVM_linear.predict(MNIST_test_images)

MNIST_SVM_linear_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_SVM_linear_train'),
                                       show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_SVM_linear_test')],
                                       axis=1)

MNIST_SVM_linear_accuracy

Unnamed: 0,MNIST_SVM_linear_train,MNIST_SVM_linear_test
accuracy,0.970733,0.9404


#### rbf

In [19]:
SVM_rbf = svm.SVC(kernel='rbf')

In [20]:
SVM_rbf.fit(MNIST_train_images, MNIST_train_labels)

MNIST_train_labels_pred = SVM_rbf.predict(MNIST_train_images)
MNIST_test_labels_pred = SVM_rbf.predict(MNIST_test_images)

MNIST_SVM_rbf_accuracy = pd.concat([show_accuracy(MNIST_train_labels, MNIST_train_labels_pred, 'MNIST_SVM_rbf_train'),
                                    show_accuracy(MNIST_test_labels, MNIST_test_labels_pred, 'MNIST_SVM_rbf_test')],
                                    axis=1)

MNIST_SVM_rbf_accuracy

Unnamed: 0,MNIST_SVM_rbf_train,MNIST_SVM_rbf_test
accuracy,0.989917,0.9792
