# Case-1: Transfer Learning
        
        [Split planes metadata into source and target according to the location of the data (Southern/Northern)
        Taking Sourthen as a source and Northern as a target
        source.csv = source metadata
        target.csv = target metadata]
        
        Source [train(90%),validation(10%)]
        Target [test(100%)]
        
        **metadata is used only for splitting.

In [None]:
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import random

In [None]:
# Planes dataset

data = pd.read_json('planesnet.json')

In [None]:
# Add an ID

data['ID'] = data.index

In [None]:
data.head()

In [None]:
# Source metadata - use for source planes ID only

source_planes = pd.read_csv("source.csv")

In [None]:
source_planes.head()

In [None]:
# Train and validation set [useing source planes ID]

train=[]
val=[]
for d in range(len(source_planes)):
    if ((source_planes['NorthernCalifornia'][d] == 0) and (source_planes['SplitLabel'][d] == 'Train')):
        train.append(data.iloc[source_planes['ImageID'][d]])
    if ((source_planes['NorthernCalifornia'][d] == 0) and (source_planes['SplitLabel'][d] == 'Validate')):
        val.append(data.iloc[source_planes['ImageID'][d]])

In [None]:
len(train),len(val)

In [None]:
x_train=[]
y_train=[]
for d in range(len(train)):
    a=np.array(train[d]['data'])
    x_train.append(a.reshape((3,20*20)).T.reshape((20,20,3)))
    y_train.append(np.array(train[d]['labels']))
x_train = np.array(x_train)
y_train=np.array(y_train)

In [None]:
x_val=[]
y_val=[]
for d in range(len(val)):
    a=np.array(val[d]['data'])
    x_val.append(a.reshape((3,20*20)).T.reshape((20,20,3)))
    y_val.append(np.array(val[d]['labels']))
x_val = np.array(x_val)
y_val=np.array(y_val)

In [None]:
#Target metadata - use for target planes ID only

target_planes = pd.read_csv("target.csv")

In [None]:
target_planes.head()

In [None]:
#Test set [using target planes ID]

test=[]
for d in range(len(target_planes)):
    if ((target_planes['NorthernCalifornia'][d] == 1) and (target_planes['SplitLabel'][d] == 'Test')):
        test.append(data.iloc[target_planes['ImageID'][d]])

In [None]:
x_test=[]
y_test=[]
for d in range(len(test)):
    a=np.array(test[d]['data'])
    x_test.append(a.reshape((3,20*20)).T.reshape((20,20,3)))
    y_test.append(np.array(test[d]['labels']))
x_test = np.array(x_test)
y_test=np.array(y_test)

In [None]:
x_train.shape, y_train.shape, x_val.shape ,y_val.shape, x_test.shape, y_test.shape

In [None]:
# CNN Model

model = Sequential()
model.add(Conv2D(32, (2, 2), padding='same',
                 input_shape=x_train.shape[1:],name = 'layer_1'))
model.add(MaxPooling2D(pool_size=(2, 2),name='layer_2'))
model.add(Activation('relu',name = 'layer_3'))

model.add(Conv2D(64, (2, 2),padding='same',name = 'layer_4'))
model.add(MaxPooling2D(pool_size=(2, 2),name='layer_5'))
model.add(Activation('relu',name = 'layer_6'))

model.add(Dropout(0.2,name = 'layer_7'))

model.add(Flatten(name='layer_8'))


model.add(Dense(256,name='layer_N9'))
model.add(Activation('relu',name = 'layer_N10'))

model.add(Dropout(0.4,name = 'layer_N11'))
model.add(Dense(128,name='layer_N12'))
model.add(Activation('relu',name = 'layer_N13'))

model.add(Dropout(0.4,name = 'layer_N14'))
model.add(Dense(1, activation='sigmoid',name='layer_N15'))

In [None]:
from keras import optimizers

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

In [None]:
train_datagen = ImageDataGenerator(
                rescale=1./255,
                )

test_datagen = ImageDataGenerator(
                rescale=1./255,
                )

In [None]:
# Generator

train_generator = train_datagen.flow(x_train, y_train, batch_size=500)
validation_generator = test_datagen.flow(x_val,y_val, batch_size=500)
test_generator = test_datagen.flow(x_test,y_test,batch_size=100,shuffle=False)

In [None]:
epochs = 50

history = model.fit_generator(
                    train_generator,
                    steps_per_epoch = len(train_generator),
                    epochs = epochs,
                    validation_data = validation_generator,
                    validation_steps= len(validation_generator),
                    verbose=1)

In [None]:
# Accuracy 

scores = model.evaluate_generator (test_generator, verbose=0)
print('Test accuracy:', scores[1])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
y_pred = model.predict_generator(test_generator)
y_pred=y_pred>0.5
y_pred=np.asarray(y_pred,dtype=bool)

In [None]:
# Confusion matrix

print(confusion_matrix(y_test, y_pred))

In [None]:
# Classification report

target_names = ['No Planes', 'Planes']
print(classification_report(y_test, y_pred, target_names= target_names))