<a href="https://colab.research.google.com/github/sidhant-guliani/satellite-images-CNN/blob/master/cnn_geospatial_data_using_VGG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import os
import gc
import zipfile
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import Sequential, Model, Input
from tensorflow.keras.layers import Dense, ReLU, Activation, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import cv2
from tqdm import tqdm
from sklearn.metrics import fbeta_score
from sklearn.model_selection import KFold


In [28]:
from google.colab import drive
drive.mount('/content/drive')

local_zip = '/content/drive/My Drive/Colab_Notebooks/503255_938046_compressed_planet.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/amazon_data')
zip_ref.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
9!ls ../amazon_data/planet

sample_submission.csv  test-jpg  train_classes.csv  train-jpg


In [4]:
base_dir = '../amazon_data/planet'
train_v2_file= os.path.join(base_dir, 'train_classes.csv')
submission_path = os.path.join(base_dir, 'sample_submission.csv')

df_train_all = pd.read_csv(train_v2_file)
df_test = pd.read_csv(submission_path)

In [5]:
print(df_train_all.columns)
#https://www.kaggle.com/jcarrillo/machine-learning-for-geospatial-data-workshop-2a


Index(['image_name', 'tags'], dtype='object')


In [6]:
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train_all['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [22]:
input_size = 32 #128
input_channels = 3
epochs = 4
batch_size = 16 #128
n_folds = 5
training = True

kf = KFold(n_splits=n_folds, shuffle=True, random_state=1) # crossfold validation

In [23]:
# earlyStopping: Stop training when a monitored metric has stopped improving.
# ReduceLROnPlateau: Reduce learning rate when a metric has stopped improving.
# parameters: 
# verbose: By setting verbose 0, 1 or 2 you just say how do you want to 'see' the training progress for each epoch.

In [24]:

def transformations(src, choice):
        if choice == 0:
            # Rotate 90
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_CLOCKWISE)
        if choice == 1:
            # Rotate 90 and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_CLOCKWISE)
            src = cv2.flip(src, flipCode=1)
        if choice == 2:
            # Rotate 180
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_180)
        if choice == 3:
            # Rotate 180 and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_180)
            src = cv2.flip(src, flipCode=1)
        if choice == 4:
            # Rotate 90 counter-clockwise
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
        if choice == 5:
            # Rotate 90 counter-clockwise and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
            src = cv2.flip(src, flipCode=1)
        return src


In [25]:
def data_generator(df, file_name):

  while True:
      for start in range(0, len(df), batch_size):
          x_batch = []
          y_batch = []
          end = min(start + batch_size, len(df))
          df_batch = df[start:end]
          for f, tags in df_batch.values:
              dir_open= os.path.join(base_dir, '{}/{}.jpg'.format(file_name, f))
              img = cv2.imread(dir_open)
              img = cv2.resize(img, (input_size, input_size))
              img = transformations(img, np.random.randint(6))
              targets = np.zeros(17)

              for t in tags.split(' '):
                  targets[label_map[t]] = 1

              x_batch.append(img)
              y_batch.append(targets)
          x_batch = np.array(x_batch, np.float32)
          y_batch = np.array(y_batch, np.uint8)

          yield x_batch, y_batch

In [None]:
fold_count = 0 #
y_full_test = []
history = []

for train_index, test_index in kf.split(df_train_all):
  fold_count += 1
  df_valid = df_train_all.iloc[test_index]
  df_train = df_train_all.iloc[train_index]
  base_model = VGG19(include_top=False, 
                     weights='imagenet',
                     input_shape = (input_size, input_size, input_channels))
  model = Sequential()
  model.add(BatchNormalization(input_shape = (input_size, input_size, input_channels)))
  model.add(base_model)
  model.add(Flatten())

  model.add(Dense(17, activation = 'sigmoid'))
  optm = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,name='Adam')
  
  model.compile(loss = 'binary_crossentropy',
                optimizer = optm,
                metrics = ['accuracy'])
  
  callbacks = [EarlyStopping(monitor='val_loss',
                               patience=4,
                               verbose=1,
                               min_delta=1e-4),
                 ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.1,
                                   patience=2,
                                   cooldown=2,
                                   verbose=1),
                 ModelCheckpoint(filepath='best_weights.fold_' + str(fold_count) + '.hdf5',
                                 save_best_only=True,
                                 save_weights_only=True)]

  if training: 
    history.append(model.fit_generator(generator = data_generator(df_train, 'train-jpg'),
                        steps_per_epoch = (len(df_train) // batch_size)+1,
                        epochs = epochs,
                        verbose = 1,
                        callbacks=callbacks,
                        validation_data=data_generator(df_valid, 'train-jpg'),
                        validation_steps=(len(df_valid) // batch_size)+1))

Epoch 1/4