In [13]:
import numpy as np
import os
from PIL import Image
import csv
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.utils.validation import check_is_fitted
from sklearn.utils import check_array, check_X_y
from sklearn import manifold
from torchvision import transforms
import matplotlib.pyplot as plt

# File path

In [14]:
train_img_path = './train_images'
test_img_path = './test_images'
train_label_file = 'train.csv'
test_label_file = 'test.csv'

In [19]:
train_img = []
train_label = []

# Data Augmentation

In [20]:
Is_Rotate = True

# LOAD IMAGE

In [21]:
img = 0
trans_img = 0
for file in os.listdir(train_img_path):
    img = Image.open(train_img_path + '/' + file)
    im = np.array(img).reshape(-1)
    train_img.append(im)
    if Is_Rotate:
        hori_img = transforms.RandomHorizontalFlip()
        vert_img = transforms.RandomVerticalFlip()
        pil_img_H = hori_img(img)
        pil_img_V = vert_img(img)
        train_img.append(np.array(pil_img_H).reshape(-1))
        train_img.append(np.array(pil_img_V).reshape(-1))
train_img = np.array(train_img)
#img

In [22]:
train_img.shape

(7584, 262144)

In [23]:
i = 0
with open(train_label_file, newline = '') as csvfile:
    rows = csv.reader(csvfile)
    for row in rows:
        if i != 0:
            train_label.append(int(row[1]))
            if Is_Rotate:
                train_label.append(int(row[1]))
                train_label.append(int(row[1]))
        i = i + 1
train_label = np.array(train_label)
train_label.shape

(7584,)

# Predict Function

In [26]:
def predict_accuracy(y, y_hat):
    return sum(y == y_hat) / y.shape[0]

# PCA

In [28]:
def PCA_decomposition(n_components, train_img):
    pca = PCA(n_components=n_components)
    pca_img = pca.fit_transform(train_img)
    pca_inv_img = pca.inverse_transform(pca_img)
    img_compressed = (np.stack(pca_imv_img[-1],axis = 0)).astype(np.uint8)
    img_pca = Image.fromarray(img_compressed.reshape(train_img.shape), 'L')
    return pca_img, pca_inv_img, img_pca

# LDA

In [29]:
def inverse_transform(lda, x):
    if lda.solver == 'lsqr':
        raise NotImplementedError("(inverse) transform not implemented for 'lsqr' "
                                  "solver (use 'svd' or 'eigen').")
    check_is_fitted(lda, ['xbar_', 'scalings_'], all_or_any=any)

    inv = np.linalg.pinv(lda.scalings_)

    x = check_array(x)
    if lda.solver == 'svd':
        x_back = np.dot(x, inv) + lda.xbar_
    elif lda.solver == 'eigen':
        x_back = np.dot(x, inv)

    return x_back

In [30]:
def LDA(n_components, train_img, train_label):
    lda = LinearDiscriminantAnalysis(n_components = n_components)
    clf = lda.fit(train_img, train_label).transform(train_img)
    Xr = inverse_transform(lda, clf)
    return Xr, clf

# TSNE

In [None]:
'''
TSNE Dimensionality reduction
'''
# Dimensionality reduction 784->2
tsne = manifold.TSNE(n_components=2, init='random',
                     random_state=5, verbose=1).fit_transform(train_img)
x_min, x_max = tsne.min(0), tsne.max(0)

X_norm = (tsne - x_min) / (x_max - x_min)  # normalization
plt.figure()
for i in range(X_norm.shape[0]):
    plt.text(X_norm[i, 0], X_norm[i, 1], str(train_label[i]), color=plt.cm.Set3(train_label[i]),
             fontdict={'weight': 'bold', 'size': 9})

plt.xticks([])
plt.yticks([])
plt.show()

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 2528 samples in 268.646s...


# Feature selection using SelectFromModel

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_selection import RFE
clf = DecisionTreeClassifier()
selected_train =