<a href="https://colab.research.google.com/github/sidhant-guliani/satellite-images-CNN/blob/master/cnn_geospatial_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
import os
import gc
import zipfile
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import Sequential, Model, Input
from tensorflow.keras.layers import Dense, ReLU, Activation, Flatten, Dropout, Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

import cv2
from tqdm import tqdm
from sklearn.metrics import fbeta_score
from sklearn.model_selection import KFold

In [45]:
from google.colab import drive
drive.mount('/content/drive')

local_zip = '/content/drive/My Drive/Colab_Notebooks/503255_938046_compressed_planet.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/amazon_data')
zip_ref.close()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [29]:
!ls ../amazon_data/planet

sample_submission.csv  test-jpg  train_classes.csv  train-jpg


In [56]:
base_dir = '../amazon_data/planet'
train_v2_file= os.path.join(base_dir, 'train_classes.csv')
submission_path = os.path.join(base_dir, 'sample_submission.csv')

df_train_data = pd.read_csv(train_v2_file)
df_test = pd.read_csv(submission_path)
#df_test = df_test[~df_test.image_name.str.startswith('test')]


In [47]:
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train_data['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [27]:
valid_data_size = 5000
input_size = 32
input_channels = 3

epochs = 15
batch_size = 128
learning_rate = 0.001
lr_decay = 1e-4

valid_data_size = 5000

In [15]:
x_valid = []
y_valid = []

df_valid = df_train_data[(len(df_train_data) - valid_data_size):]
for f, tags in tqdm(df_valid.values, miniters=100):
    dir_open= os.path.join(base_dir, 'train-jpg/{}.jpg'.format(f))
    img = cv2.imread(dir_open)
    img = cv2.resize(img, (input_size, input_size))

    targets = np.zeros(17)
    
    for t in tags.split(' '):
        targets[label_map[t]] = 1
    x_valid.append(img)
    y_valid.append(targets)

y_valid = np.array(y_valid, np.uint8)
x_valid = np.array(x_valid, np.float32)

100%|██████████| 5000/5000 [00:07<00:00, 682.74it/s]


In [16]:
print(x_valid.shape)
print(y_valid.shape)

(5000, 32, 32, 3)
(5000, 17)


In [17]:
x_train = []
y_train = []

df_train = df_train_data[:(len(df_train_data) - valid_data_size)]

for f, tags in tqdm(df_train.values, miniters=1000):
    dir_open= os.path.join(base_dir, 'train-jpg/{}.jpg'.format(f))
    img = cv2.imread(dir_open)
    img = cv2.resize(img, (input_size, input_size))
    targets = np.zeros(17)

    for t in tags.split(' '):
        targets[label_map[t]] = 1

    x_train.append(img)
    y_train.append(targets)
    img = cv2.flip(img, 0)  # flip vertically
    x_train.append(img)
    y_train.append(targets)
    img = cv2.flip(img, 1)  # flip horizontally
    x_train.append(img)
    y_train.append(targets)
    img = cv2.flip(img, 0)  # flip vertically
    x_train.append(img)
    y_train.append(targets)

y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float32)

100%|██████████| 35479/35479 [00:56<00:00, 625.14it/s]


"y_test = []\n\np_test = model.predict(x_test, batch_size=batch_size, verbose=2)\ny_test.append(p_test)\n\nresult = np.array(y_test[0])\nresult = pd.DataFrame(result, columns=labels)\n\npreds = []\n\nfor i in tqdm(range(result.shape[0]), miniters=1000):\n    a = result.ix[[i]]\n    a = a.apply(lambda x: x > 0.2, axis=1)\n    a = a.transpose()\n    a = a.loc[a[i] == True]\n    ' '.join(list(a.index))\n    preds.append(' '.join(list(a.index)))\n\ndf_test_data['tags'] = preds"

In [18]:
#y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
#y_valid = np.asarray(y_valid).astype('float32').reshape((-1,1))

In [19]:
# in this cell Im not using the kfold validation and fit_generator (using fit)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(32, 32, 3)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(17, activation='sigmoid'))

model.compile(loss='binary_crossentropy', # We NEED binary here, since categorical_crossentropy l1 norms the output before calculating loss.
              optimizer='adam',
              metrics=['accuracy'])
              
model.fit(x_train, y_train,
          batch_size=128,
          epochs=4,
          verbose=1,
          validation_data=(x_valid, y_valid))
          

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fcad4157a58>

In [None]:
x#https://www.kaggle.com/petrosgk/1st-try-with-keras-0-918-lb

In [None]:
x_test = []

for f, tags in tqdm(df_test.values, miniters=1000):
    dir_open= os.path.join(base_dir, 'test-jpg/{}.jpg'.format(f))
    img = cv2.imread(dir_open)
    if type(img) is np.ndarray:
      if img.size == 0:
        continue
    if img is None:
        continue
    img = cv2.resize(img, (input_size, input_size))
    x_test.append(img)

x_test = np.array(x_test, np.float32)

In [67]:
y_test = []
p_test = model.predict(x_test, batch_size=batch_size, verbose=2)
y_test.append(p_test)
result = np.array(y_test[0])
result = pd.DataFrame(result, columns=labels)
preds = []


318/318 - 20s


In [65]:
for i in tqdm(range(result.shape[0]), miniters=1000):
    a = result.iloc[[i]]
    a = a.apply(lambda x: x > 0.2, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    preds.append(' '.join(list(a.index)))

df_test_data['tags'] = preds





  0%|          | 0/40669 [00:00<?, ?it/s]


NameError: ignored