In [None]:
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from sklearn.metrics import classification_report,confusion_matrix
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import pickle 
from skimage import io
import seaborn as sns
import pandas as pd
import collections
import numpy as np
import scipy.stats
import statistics
import urllib
import json
import cv2
import os

image_data_complete = None
x_train = None
y_train = None
x_val = None
y_val = None 
label_1 = None
label_0 = None
train = None
val = None

def json_to_dict_project():
    json_file = "anonymized_project.json"
    dict_data = None
    with open(json_file) as jsonfile:
        dict_data = json.load(jsonfile)
    return dict_data
def json_to_dict_reference():
    json_file = "references.json"
    dict_data = None
    with open(json_file) as jsonfile:
        dict_data = json.load(jsonfile)
    return dict_data


def answer_detector(reference_data, user_data):
    answer = "no"
    img_id = user_data["task_input"]["image_url"][-12:-4]
    if img_id in reference_data.keys():
        truthy_value = reference_data[img_id]
        if truthy_value["is_bicycle"] == "True":
            answer = "yes"
        if user_data["task_output"]["answer"] == answer:
            return 1
    return 0
            
def image_to_array(url):
    img_size = 224
    image = io.imread(url)
    img_arr = image[...,::-1]
    resized_arr = cv2.resize(img_arr, (img_size, img_size))
    return resized_arr

def data_to_array():
    data = []
    project_data = json_to_dict_project()
    reference_data = json_to_dict_reference()
    root_data = project_data["results"]["root_node"]["results"]
    for items in root_data:
        for last_child in root_data[items]["results"]:
            img_url = last_child["task_input"]["image_url"]
            if img_url not in done:
                resized_array = image_to_array(img_url)
                label = answer_detector(reference_data, last_child)
                data.append([resized_array, label])
    pickle.dump( data, open( "data_save.pkl", "wb" ) )
    
def load_saved_data():
    global image_data_complete
    ##Data saved in pickle file in case model has to be rerun, conversion takes time
    #image_data_complete = pickle.load( open( "data_save.pkl", "rb" ) )
    image_data_complete = pickle.load( open( "data_save.pkl", "rb" ) )

def labels_generate():
    global image_data_complete
    global label_1
    global label_0
    label_1 = []
    label_0 = []
    for image in image_data_complete:
        if image[1]== 1:
            label_1.append(image)
        else:
            label_0.append(image)

def generate_train_val():
    global train
    global val
    train = label_1[:round(len(label_1)/2)] + label_0[:round(len(label_0)/2)]
    val = train = label_1[round(len(label_1)/2):] + label_0[round(len(label_0)/2):]            

def data_normalize():
    global x_train
    global y_train 
    global x_val
    global y_val
    global train
    global val
    x_train = []
    y_train = []
    x_val = []
    y_val = []
    img_size = 224
    for feature, label in train:
        x_train.append(feature)
        y_train.append(label)

    for feature, label in val:
        x_val.append(feature)
        y_val.append(label)

    # Normalize the data
    x_train = np.array(x_train) / 255
    x_val = np.array(x_val) / 255

    x_train.reshape(-1, img_size, img_size, 1)
    y_train = np.array(y_train)

    x_val.reshape(-1, img_size, img_size, 1)
    y_val = np.array(y_val)

def main_model():
    global x_train
    global y_train 
    global x_val
    global y_val
    global train
    global val
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  
        samplewise_std_normalization=False,  
        zca_whitening=False, 
        rotation_range = 30, 
        zoom_range = 0.2, 
        width_shift_range=0.1,  
        height_shift_range=0.1,  
        horizontal_flip = True,  
        vertical_flip=False)  


    datagen.fit(x_train)
    
    model = Sequential()
    model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(224,224,3)))
    model.add(MaxPool2D())

    model.add(Conv2D(32, 3, padding="same", activation="relu"))
    model.add(MaxPool2D())

    model.add(Conv2D(64, 3, padding="same", activation="relu"))
    model.add(MaxPool2D())
    model.add(Dropout(0.4))

    model.add(Flatten())
    model.add(Dense(128,activation="relu"))
    model.add(Dense(2, activation="softmax"))

    model.summary()
    
    opt = Adam(lr=0.000001)
    model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) , metrics = ['accuracy'])
    
    history = model.fit(x_train,y_train,epochs = 500 , validation_data = (x_val, y_val))
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs_range = range(500)

    plt.figure(figsize=(15, 15))
    plt.subplot(2, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()
    
    predictions = model.predict_classes(x_val)
    predictions = predictions.reshape(1,-1)[0]
    print(classification_report(y_val, predictions, target_names = ['Not a Bicycle (Class 0)','Bicycle (Class 1)']))

In [None]:
#Initially to save images into array and pickle file
data_to_array()

In [None]:
#To start from here
load_saved_data()
labels_generate()
generate_train_val()
data_normalize()
main_model()