# HackerEarth DL Challenge

**Prerequisites**

In [1]:
!pip install opencv-python
!pip install tflearn
!pip install tqdm
!pip install PyDrive



**Header And Imports**

In [0]:
import cv2                 # working with, mainly resizing, images
import numpy as np
import os
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training
from tqdm import tqdm      # a nice percentage bar ;)
import pickle
from google.colab import files

import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from tflearn.data_utils import shuffle, to_categorical
import tensorflow as tf

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# TRAIN_DIR = ''
# TEST_DIR = ''
IMG_SIZE = 150
LR = 1e-3

MODEL_NAME = 'model_tflearn_trained.model'

**Data Download and Extraction**

In [3]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

DRIVE_FILE_ID = '1Yzx5AeuNq7mo0AI3y1jf2SBi4gaZcFKE'

fileId = drive.CreateFile({'id':DRIVE_FILE_ID})
fileId.GetContentFile('train_data.dat')
!ls

datalab  train_data.dat


**Data Preprocessing**

In [0]:
def label_img(img):
    word_label = img.split('.')[0]
    if word_label == 'antelope': return 0
    elif word_label == 'bat': return 1
    elif word_label == 'beaver': return 2
    elif word_label == 'bobcat': return 3
    elif word_label == 'buffalo': return 4
    elif word_label == 'chihuahua': return 5
    elif word_label == 'chimpanzee': return 6
    elif word_label == 'collie': return 7
    elif word_label == 'dalmatian': return 8
    elif word_label == 'german+shepherd': return 9
    elif word_label == 'grizzly+bear': return 10
    elif word_label == 'hippopotamus': return 11
    elif word_label == 'horse': return 12
    elif word_label == 'killer+whale': return 13
    elif word_label == 'mole': return 14
    elif word_label == 'moose': return 15
    elif word_label == 'mouse': return 16
    elif word_label == 'otter': return 17
    elif word_label == 'ox': return 18
    elif word_label == 'persian+cat': return 19
    elif word_label == 'raccoon': return 20
    elif word_label == 'rat': return 21
    elif word_label == 'rhinoceros': return 22
    elif word_label == 'seal': return 23
    elif word_label == 'siamese+cat': return 24
    elif word_label == 'spider+monkey': return 25
    elif word_label == 'squirrel': return 26
    elif word_label == 'walrus': return 27
    elif word_label == 'weasel': return 28
    elif word_label == 'wolf': return 29


def create_train_data():
    training_data = []

    if os.path.exists("train_data.dat"):
        file = open('train_data.dat', 'rb')
        training_data = pickle.load(file)
        file.close()
        return training_data

    img_count = 0
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
        img_count += 1
        #if img_count > 6000 : break

    shuffle(training_data)

    file = open('train_data.dat', 'wb')
    pickle.dump(training_data, file)  
    file.close()

    return training_data

train_data = create_train_data()

train = train_data[:-3000]
test = train_data[-3000:]

**Data Split**

In [5]:
# Training Data
x_train = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
y_train = [i[1] for i in train]

y_train = to_categorical(y_train,30)

# Testing Data
x_test = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)

y_test = [i[1] for i in test]
print(y_test)
y_test = to_categorical(y_test,30)
print(y_test)

[array(15, dtype=int32), array(15, dtype=int32), array(17, dtype=int32), array(12, dtype=int32), array(3, dtype=int32), array(17, dtype=int32), array(6, dtype=int32), array(0, dtype=int32), array(14, dtype=int32), array(29, dtype=int32), array(11, dtype=int32), array(0, dtype=int32), array(3, dtype=int32), array(1, dtype=int32), array(6, dtype=int32), array(18, dtype=int32), array(14, dtype=int32), array(10, dtype=int32), array(15, dtype=int32), array(22, dtype=int32), array(17, dtype=int32), array(26, dtype=int32), array(11, dtype=int32), array(7, dtype=int32), array(15, dtype=int32), array(0, dtype=int32), array(12, dtype=int32), array(20, dtype=int32), array(12, dtype=int32), array(26, dtype=int32), array(11, dtype=int32), array(9, dtype=int32), array(17, dtype=int32), array(3, dtype=int32), array(7, dtype=int32), array(4, dtype=int32), array(29, dtype=int32), array(11, dtype=int32), array(20, dtype=int32), array(12, dtype=int32), array(6, dtype=int32), array(8, dtype=int32), array(

**Model Design**

In [6]:
# Convolutional network building
tf.reset_default_graph()
network = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

network = conv_2d(network, 32, 3, activation='relu')

network = max_pool_2d(network, 2)

network = conv_2d(network, 64, 3, activation='relu')

network = conv_2d(network, 64, 3, activation='relu')

network = max_pool_2d(network, 2)

network = fully_connected(network, 512, activation='relu')

network = dropout(network, 0.5)

network = fully_connected(network, 30, activation='softmax')

network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.001)

Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
Instructions for updating:
keep_dims is deprecated, use keepdims instead


**Training the Model**

In [7]:
# Train
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(x_train, y_train, n_epoch=100, shuffle=True, validation_set=(x_test, y_test),
          show_metric=True, run_id=MODEL_NAME)
model.save(MODEL_NAME)
#files.download(MODEL_NAME)

Training Step: 15699  | total loss: [1m[32m21.65076[0m[0m | time: 24.189s
| Adam | epoch: 100 | loss: 21.65076 - acc: 0.0597 -- iter: 09984/10000
Training Step: 15700  | total loss: [1m[32m21.64436[0m[0m | time: 26.797s
| Adam | epoch: 100 | loss: 21.64436 - acc: 0.0600 | val_loss: 21.55987 - val_acc: 0.0637 -- iter: 10000/10000
--
INFO:tensorflow:/content/dlchallenge-0.001-2conv-basic.model is not in all_model_checkpoint_paths. Manually adding it.


FileNotFoundError: ignored