## Mini Project

In [1]:
import torch
import time
import numpy as np

## Feature Extraction - ResNet

In [None]:
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
import numpy as np
from numpy.random import RandomState

prng = RandomState(42)
num = 5000
#Extract a subset of 5000 samples from MNIST training
random_permute=prng.permutation(np.arange(0,60000))[0:num]

In [None]:
x = np.zeros((num, 28, 28))
y = np.zeros(num)
for i in range(x.shape[0]):
  x[i] = x_train[random_permute[i]]
  y[i] = y_train[random_permute[i]]

In [None]:
import cv2

def change_size(x):
  x_resize = np.zeros((x.shape[0], 224, 224))
  for i in range(x.shape[0]):
    x_resize[i] = cv2.resize(x[i], (224, 224))

  x_channel = x_resize[:,:,:,np.newaxis]
  x_final = np.concatenate( (x_channel,x_channel), axis=3 )
  x_final = np.concatenate( (x_final,x_channel), axis=3 )
  return x_final

In [None]:
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions

x_input = change_size(x)
x_input = preprocess_input(x_input)
res50_base_model = ResNet50(weights='imagenet', include_top=True)
res50_model = Model(inputs=res50_base_model.input, outputs=res50_base_model.get_layer('fc1000').input)

In [None]:
import time

start = time.time()

res50_feature = res50_model.predict(x_input)
res50_label = y

end = time.time()
end - start

In [None]:
res50_feature.shape

### PCA of ResNet50

In [None]:
start = time.time()

res50_pca_embedded = PCA(n_components=2).fit_transform(res50_feature)  
res50_pca_embedded.shape

end = time.time()
end - start

In [None]:
res50_pca_embedded_df = pd.DataFrame(res50_pca_embedded,index=res50_label)
res50_pca_embedded_df = res50_pca_embedded_df.reset_index()
res50_pca_embedded_df['index'] = res50_pca_embedded_df['index'].astype(int)
res50_pca_embedded_df_plot = res50_pca_embedded_df.loc[range(5000),:]
res50_pca_embedded_df_plot.columns = ['Label','PCA Dim1', 'PCA Dim2']
plt.figure(figsize=(10,10),dpi=120)
ax = sns.scatterplot(x='PCA Dim1', y='PCA Dim2',hue='Label',data=res50_pca_embedded_df_plot,
                     palette='Set1',alpha=0.6,s=20,legend='full') # tab10  Set1  husl 
legend = plt.legend(bbox_to_anchor=(1, 0.6))
plt.show()

### TSNE of ResNet50

In [None]:
start = time.time()

res50_tsne_embedded = TSNE(n_components=2).fit_transform(res50_feature)

end = time.time()
end - start

In [None]:
res50_tsne_embedded_df = pd.DataFrame(res50_tsne_embedded,index=res50_label)
res50_tsne_embedded_df = res50_tsne_embedded_df.reset_index()
res50_tsne_embedded_df['index'] = res50_tsne_embedded_df['index'].astype(int)
res50_tsne_embedded_df_plot = res50_tsne_embedded_df.loc[range(5000),:]
res50_tsne_embedded_df_plot.columns = ['Label','tSNE Dim1', 'tSNE Dim2']
plt.figure(figsize=(10,10),dpi=120)
ax = sns.scatterplot(x='tSNE Dim1', y='tSNE Dim2',hue='Label',data=res50_tsne_embedded_df_plot,
                     palette='tab10',alpha=0.6,s=20,legend='full')
legend = plt.legend(bbox_to_anchor=(1.001, 0.7),facecolor='white',edgecolor='white')
plt.show()

### Classifications based on features extracted by resnet50

In [None]:
from sklearn.model_selection import cross_validate

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

start = time.time()

lda = LinearDiscriminantAnalysis()

lda_results = cross_validate(lda, res50_feature[:5000], res50_label[:5000], cv=3, scoring='accuracy')
lda_results = lda_results['test_score']

end = time.time()
end - start, lda_results

In [None]:
from sklearn.linear_model import LogisticRegression

start = time.time()

logistic_regression = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')

logistic_regression_results = cross_validate(logistic_regression, res50_feature[:5000], res50_label[:5000], cv=3, scoring='accuracy')
logistic_regression_results = logistic_regression_results['test_score']

end = time.time()
end - start, logistic_regression_results

In [None]:
from sklearn import svm

start = time.time()

linear_svc = svm.LinearSVC()

svm_results = cross_validate(linear_svc, res50_feature[:5000], res50_label[:5000], cv=3, scoring='accuracy')
svm_results = svm_results['test_score']

end = time.time()
end - start, svm_results

In [None]:
from sklearn.ensemble import RandomForestClassifier

start = time.time()

random_forest = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0)

random_forest_results = cross_validate(random_forest, res50_feature[:5000], res50_label[:5000], cv=3, scoring='accuracy')
random_forest_results = random_forest_results['test_score']

end = time.time()
end - start, random_forest_results

In [None]:
lda_results,logistic_regression_results,svm_results,random_forest_results

In [None]:
lda_results.mean(),logistic_regression_results.mean(),svm_results.mean(),random_forest_results.mean()

In [None]:
lda_results.std(),logistic_regression_results.std(),svm_results.std(),random_forest_results.std()