# Transfer Learning CIFAR10

* Train a simple convnet on the CIFAR dataset the first 5 output classes [0..4].
* Freeze convolutional layers and fine-tune dense layers for the last 5 ouput classes [5..9].


### 1. Import CIFAR10 data and create 2 datasets with one dataset having classes from 0 to 4 and other having classes from 5 to 9 

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
tf.__version__

'2.0.0'

In [3]:
(trainX, trainY),(testX, testY) = tf.keras.datasets.cifar10.load_data()

In [4]:
trainX.shape, trainY.shape, testX.shape, testY.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))

### First concatinating the X and y arrays

In [5]:
X = np.concatenate([trainX, testX], 0)
y = np.concatenate([trainY, testY], 0)

### Check the shape of X and y after concatenation

In [6]:
X.shape, y.shape

((60000, 32, 32, 3), (60000, 1))

### Create a filter to get the indices of target values < 5 and >= 5

In [7]:
train_filter = np.where(y < 5)
test_filter = np.where(y >= 5)

### Get the train, test values based on above filter

In [8]:
trainX, trainY = X[train_filter[0]], y[train_filter]
testX, testY = X[test_filter[0]], y[test_filter]

### Check the shape of X and y after applying filter

In [9]:
trainX.shape, trainY.shape, testX.shape, testY.shape

((30000, 32, 32, 3), (30000,), (30000, 32, 32, 3), (30000,))

### 2. Use One-hot encoding to divide y_train and y_test into required no of output classes

In [10]:
#Convert labels to one hot encoding
trainY = tf.keras.utils.to_categorical(trainY, num_classes=10)
testY = tf.keras.utils.to_categorical(testY, num_classes=10)

### 3. Build a sequential neural network model which can classify the classes 0 to 4 of CIFAR10 dataset with at least 80% accuracy on test data

In [11]:
#Clear out tensorflow memory
tf.keras.backend.clear_session()

#Initialize Sequential model
model = tf.keras.models.Sequential()

#Reshape data from 2D to 1D
model.add(tf.keras.layers.Reshape((32,32,3),input_shape=(32,32,3)))

#Normalize the data
model.add(tf.keras.layers.BatchNormalization())

#Add first convolutional layer
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu'))

#Add second convolutional layer
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3,3), activation='relu'))

#Add MaxPooling layer
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))

#Flatten the output
model.add(tf.keras.layers.Flatten())

#Add another dropout layer
model.add(tf.keras.layers.Dropout(0.25))

#Output layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [12]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 32, 32, 3)         0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 3)         12        
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 12544)             0         
_________________________________________________________________
dropout (Dropout)            (None, 12544)             0

In [14]:
#Saving the best model using model checkpoint callback
model_checkpoint_04=tf.keras.callbacks.ModelCheckpoint('cifar10-model_04.h5', 
                                                    save_best_only=True, 
                                                    monitor='accuracy', 
                                                    mode='max', 
                                                    verbose=1)

In [46]:
model.fit(trainX, trainY, epochs=10, batch_size=32, callbacks=[model_checkpoint_04])

Train on 30000 samples
Epoch 1/10
Epoch 00002: accuracy did not improve from 0.89020
Epoch 3/10
Epoch 00003: accuracy improved from 0.89020 to 0.89250, saving model to cifar10-model_04.h5
Epoch 4/10
Epoch 00004: accuracy improved from 0.89250 to 0.90607, saving model to cifar10-model_04.h5
Epoch 5/10
Epoch 00005: accuracy improved from 0.90607 to 0.91147, saving model to cifar10-model_04.h5
Epoch 6/10
Epoch 00006: accuracy improved from 0.91147 to 0.91637, saving model to cifar10-model_04.h5
Epoch 7/10
Epoch 00007: accuracy improved from 0.91637 to 0.92240, saving model to cifar10-model_04.h5
Epoch 8/10
Epoch 00008: accuracy improved from 0.92240 to 0.92333, saving model to cifar10-model_04.h5
Epoch 9/10
Epoch 00009: accuracy improved from 0.92333 to 0.92893, saving model to cifar10-model_04.h5
Epoch 10/10
Epoch 00010: accuracy improved from 0.92893 to 0.92907, saving model to cifar10-model_04.h5


<tensorflow.python.keras.callbacks.History at 0x1d280609b08>

### 4. In the model which was built above (for classification of classes 0-4 in CIFAR10), make only the dense layers to be trainable and conv layers to be non-trainable

In [17]:
for layer in model.layers:
    if("conv" in layer.name):
        layer.trainable = False

In [18]:
for layer in model.layers:
    print(layer.name, layer.trainable)

reshape True
batch_normalization True
conv2d False
conv2d_1 False
max_pooling2d True
flatten True
dropout True
dense True


In [19]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 32, 32, 3)         0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 3)         12        
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 12544)             0         
_________________________________________________________________
dropout (Dropout)            (None, 12544)             0

### We see that the Convolutional layers are not trainable and their weights are freezed now. Only the last Dense layer is trainable 

### 5. Utilize the the model trained on CIFAR 10 (classes 0 to 4) to classify the classes 5 to 9 of CIFAR 10  (Use Transfer Learning) <br>
Achieve an accuracy of more than 85% on test data

### Remove the last Dense layer from the earlier trained model for 0-4 classes

In [21]:
model.pop()

### Add the new Dense layer for 5-9 classes

In [22]:
#Output layer
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [23]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Check the model, layers and the parameters

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape (Reshape)            (None, 32, 32, 3)         0         
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 3)         12        
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 32)        896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 12544)             0         
_________________________________________________________________
dropout (Dropout)            (None, 12544)             0

In [25]:
#Saving the best model using model checkpoint callback
model_checkpoint_59=tf.keras.callbacks.ModelCheckpoint('cifar10-model_59.h5', 
                                                    save_best_only=True, 
                                                    monitor='accuracy', 
                                                    mode='max', 
                                                    verbose=1)

### Now, retrain the model with test data of classes 5-9

In [26]:
model.fit(testX, testY, epochs=10, batch_size=32, callbacks=[model_checkpoint_59])

Train on 30000 samples
Epoch 1/10
Epoch 00002: accuracy improved from 0.82710 to 0.89383, saving model to cifar10-model_59.h5
Epoch 3/10
Epoch 00003: accuracy improved from 0.89383 to 0.91723, saving model to cifar10-model_59.h5
Epoch 4/10
Epoch 00004: accuracy improved from 0.91723 to 0.92730, saving model to cifar10-model_59.h5
Epoch 5/10
Epoch 00005: accuracy improved from 0.92730 to 0.93507, saving model to cifar10-model_59.h5
Epoch 6/10
Epoch 00006: accuracy improved from 0.93507 to 0.93967, saving model to cifar10-model_59.h5
Epoch 7/10
Epoch 00007: accuracy improved from 0.93967 to 0.94460, saving model to cifar10-model_59.h5
Epoch 8/10
Epoch 00008: accuracy improved from 0.94460 to 0.94793, saving model to cifar10-model_59.h5
Epoch 9/10
Epoch 00009: accuracy improved from 0.94793 to 0.95050, saving model to cifar10-model_59.h5
Epoch 10/10
Epoch 00010: accuracy improved from 0.95050 to 0.95257, saving model to cifar10-model_59.h5


<tensorflow.python.keras.callbacks.History at 0x1d2d4851888>

# Text classification using TF-IDF

### 6. Load the dataset from sklearn.datasets

In [28]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer

In [29]:
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

### 7. Training data

In [30]:
twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)

### 8. Test data

In [31]:
twenty_test = fetch_20newsgroups(subset='test', categories=categories, shuffle=True, random_state=42)

###  a.  You can access the values for the target variable using .target attribute 
###  b. You can access the name of the class in the target variable with .target_names


In [32]:
twenty_train.target

array([1, 1, 3, ..., 2, 2, 2], dtype=int64)

In [33]:
twenty_train.target_names

['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']

In [34]:
twenty_train.data[0:5]

['From: sd345@city.ac.uk (Michael Collier)\nSubject: Converting images to HP LaserJet III?\nNntp-Posting-Host: hampton\nOrganization: The City University\nLines: 14\n\nDoes anyone know of a good way (standard PC application/PD utility) to\nconvert tif/img/tga files into LaserJet III format.  We would also like to\ndo the same, converting to HPGL (HP plotter) files.\n\nPlease email any response.\n\nIs this the correct group?\n\nThanks in advance.  Michael.\n-- \nMichael Collier (Programmer)                 The Computer Unit,\nEmail: M.P.Collier@uk.ac.city                The City University,\nTel: 071 477-8000 x3769                      London,\nFax: 071 477-8565                            EC1V 0HB.\n',
 "From: ani@ms.uky.edu (Aniruddha B. Deglurkar)\nSubject: help: Splitting a trimming region along a mesh \nOrganization: University Of Kentucky, Dept. of Math Sciences\nLines: 28\n\n\n\n\tHi,\n\n\tI have a problem, I hope some of the 'gurus' can help me solve.\n\n\tBackground of the probl

### 9.  Now with dependent and independent data available for both train and test datasets, using TfidfVectorizer fit and transform the training data and test data and get the tfidf features for both

In [35]:
len(twenty_train.data), len(twenty_train.target), len(twenty_test.data), len(twenty_test.target)

(2257, 2257, 1502, 1502)

In [36]:
tfidf_vectorizer = TfidfVectorizer(min_df=3, max_df=0.8, stop_words='english')

In [37]:
twenty_train_X = tfidf_vectorizer.fit_transform(twenty_train.data)

In [38]:
twenty_test_X = tfidf_vectorizer.transform(twenty_test.data)

In [39]:
tfidf_vectorizer.get_feature_names()

['00',
 '000',
 '0001',
 '0010580b',
 '01',
 '0100',
 '01580',
 '02',
 '0200',
 '02118',
 '02173',
 '023044',
 '024103',
 '03',
 '0349',
 '0358',
 '04',
 '040',
 '041343',
 '05',
 '0511',
 '06',
 '0600',
 '0608',
 '06320',
 '07',
 '08',
 '081052',
 '084042',
 '09',
 '095220',
 '0x100',
 '10',
 '100',
 '1000',
 '101',
 '10101',
 '101010',
 '1013',
 '102',
 '102007',
 '1024',
 '1024x768',
 '103',
 '104',
 '105',
 '106',
 '109',
 '10th',
 '11',
 '110',
 '1100',
 '111',
 '1111',
 '112',
 '11230',
 '11292',
 '113',
 '114',
 '114127',
 '114158',
 '115',
 '115288',
 '115565',
 '115a',
 '116305',
 '11632',
 '1165',
 '11670',
 '117',
 '1170',
 '118',
 '11825',
 '119',
 '1192d',
 '12',
 '120',
 '1200',
 '12091',
 '121',
 '122',
 '122647',
 '123',
 '1246',
 '125',
 '126',
 '127',
 '128',
 '1280',
 '129',
 '13',
 '130',
 '1300',
 '131',
 '1312',
 '131239',
 '132',
 '133',
 '134',
 '13495',
 '135',
 '1352',
 '135941',
 '136',
 '137',
 '1379',
 '138',
 '139',
 '13h',
 '13th',
 '14',
 '140',
 '1400',

### 10. Use logisticRegression with tfidf features as input and targets as output and train the model and report the train and test accuracy score

In [40]:
# import and instantiate a logistic regression model
logreg = LogisticRegression()

In [41]:
# train the model using twenty_train_X
logreg.fit(twenty_train_X, twenty_train.target)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [42]:
# make class predictions for twenty_train_X
twenty_train_predict = logreg.predict(twenty_train_X)

In [43]:
# make class predictions for twenty_test_X
twenty_test_predict = logreg.predict(twenty_test_X)

In [44]:
# calculate accuracy
print('Training Accuracy: ', metrics.accuracy_score(twenty_train.target, twenty_train_predict))

Training Accuracy:  0.9955693398316349


In [45]:
# calculate accuracy
print('Testing Accuracy: ', metrics.accuracy_score(twenty_test.target, twenty_test_predict))

Testing Accuracy:  0.90745672436751
