### Check how many layers are frozen in :

https://github.com/ShehabMMohamed/TinyImageNet-KaggleCompetition

In [None]:
from keras.applications import Xception
from keras.layers import Input, Dense

In [None]:
img_height = 299
img_width = 299

input_tensor = Input(shape=(img_width, img_height, 3))

pre_trained_model = Xception(weights='imagenet', input_tensor=input_tensor, include_top=False, pooling='avg')
# Downloads 83.7Mb of model..., 

In [None]:
pre_trained_model.layers[20].name
# 'block3_sepconv2' ( next are 'block3_sepconv2_bn' and 'conv2d_2' )

In [None]:
pre_trained_model.summary()
#Total params: 20,861,480
#Trainable params: 20,806,952
#Non-trainable params: 54,528
# Input ... block3_sepconv2 ... block14_sepconv2 ... global_average_pooling2d_1

In [None]:
### That's a lot of fine tuning...

### Position of block3_sepconv2 in xception : 

https://github.com/keras-team/keras-applications/blob/master/keras_applications/xception.py#L188

### Should check the layout of the PyTorch model too...

https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/xception.py#L137

Seems to be only 12 blocks, which is a little odd...

### See UMAP results on basic 'digits' raw image files (per documentation)

In [None]:
! pip install umap-learn

In [None]:
import numpy as np
from sklearn.datasets import load_iris, load_digits
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline

In [None]:
sns.set(style='white', context='notebook', rc={'figure.figsize':(14,10)})

In [None]:
digits = load_digits()
#print(digits.DESCR) # Documentation
#digits.images.shape # (1797, 8, 8)
digits.data.shape   # (1797, 64)

In [None]:
import umap

In [None]:
reducer = umap.UMAP(random_state=42)
reducer.fit(digits.data)  # <3

In [None]:
embedding = reducer.transform(digits.data)
# Verify that the result of calling transform is
# idenitical to accessing the embedding_ attribute
assert(np.all(embedding == reducer.embedding_))
embedding.shape

In [None]:
plt.scatter(embedding[:, 0], embedding[:, 1], c=digits.target, cmap='Spectral', s=5)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
plt.title('UMAP projection of the Digits dataset', fontsize=24);

### See UMAP results on plain MNIST raw image files

*  https://github.com/snakers4/playing_with_vae

In [None]:
# ! pip install Pillow-SIMD  # Hmm : Missing jpeg library...

In [None]:
import numpy as np

import torch
import torchvision.datasets

In [None]:
mnist_data = torchvision.datasets.MNIST('./data/mnist', download=True, train=True, 
  transform=torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,)),
  ]),
)
data_set = mnist_data

In [None]:
batch_size=100

data_loader = torch.utils.data.DataLoader(
  data_set, batch_size=batch_size, #shuffle=True,
  num_workers=0, 
)

In [None]:
proj_dim = 100

proj = torch.nn.Linear( 28*28, proj_dim )  # Does the initialisation 'better' than torch.randn()

In [None]:
# This maps the result of the projection directly into the results numpy arrays
xs, ys = np.zeros( (len(data_set), proj_dim) ), np.zeros( (len(data_set),) )
for i_batch, (x_batch, y_batch) in enumerate(data_loader):
    x_proj = proj( x_batch.view(-1, 28*28) )
    #print(x_proj.size()); break
    xs[i_batch*batch_size:(i_batch+1)*batch_size, :] = x_proj.detach()
    ys[i_batch*batch_size:(i_batch+1)*batch_size] = y_batch.detach()    

In [None]:
#i_batch*batch_size
base=59990; ys[base+0:base+10]

In [None]:
import time
import umap

In [None]:
t0 = time.time()
reducer = umap.UMAP(random_state=42)
reducer.fit(xs) 
print("Fitting %d-D took %d secs" % (proj_dim, time.time()-t0,) )  
# 400d : 74 secs
# 100d : 76 secs 
#  50d : 69 secs
#  35d : 68 secs
#  20d : 68 secs

In [None]:
embedding = reducer.transform(xs)
embedding.shape

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(embedding[:, 0], embedding[:, 1], c=ys, cmap='Spectral', s=5)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(10))
plt.title('UMAP %d-D projection of MNIST' % (proj_dim,), fontsize=24);

In [None]:
# Ok, so now let's build a CNN to classify regular MNIST 
#   with last layer being 100D, and re-run the UMAP on that result...