<a href="https://colab.research.google.com/github/kjain1810/aicrowd/blob/master/ORIENTME.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#preprocess data before NN

##install

In [None]:
import sys
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install scikit-learn 
!{sys.executable} -m pip install matplotlib tqdm
!{sys.executable} -m pip install opencv-python

##data import

In [None]:
# #Donwload the datasets
!rm -rf data/
!mkdir data/

!curl https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/orientme/v0.2/training.tar.gz -o data/training.tar.gz
!curl https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/orientme/v0.2/test.tar.gz -o data/test.tar.gz
!curl https://s3.eu-central-1.wasabisys.com/aicrowd-practice-challenges/public/orientme/v0.2/sample_submission.csv -o data/sample_submission.csv
!tar xvzf data/training.tar.gz -C data/
!tar xvzf data/test.tar.gz -C data/


In [None]:
TRAINING_IMAGES_FOLDER = "data/training/images/"
TRAINING_LABELS_PATH = "data/training/labels.csv"
TEST_IMAGES_FOLDER = "data/images"
SAMPLE_SUBMISSION_FILE_PATH = "data/sample_submission.csv"


##import


In [None]:
import os
import tqdm

import numpy as np
import pandas as pd
import cv2

from PIL import Image

from google.colab.patches import cv2_imshow

import glob

from google.colab import files

##pre-process data


###get corners of image

In [None]:
def get_corner(image):
    morph = im.copy()

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

    gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)

    image_channels = np.split(np.asarray(gradient_image), 3, axis=2)

    channel_height, channel_width, _ = image_channels[0].shape

    for i in range(0, 3):
        _, image_channels[i] = cv2.threshold(~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
        image_channels[i] = np.reshape(image_channels[i], newshape=(channel_height, channel_width, 1))

    image_channels = np.concatenate((image_channels[0], image_channels[1], image_channels[2]), axis=2)

    lower = np.array([0, 0, 0])
    upper = np.array([254, 254, 254])
    ret = cv2.inRange(image_channels, lower, upper)
    return ret

###counting colors

In [None]:
def count_red(image, debug=False):
    img_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    
    lower_red = np.array([0,80,100])
    upper_red = np.array([5,200,255])
    mask0 = cv2.inRange(img_hsv, lower_red, upper_red)
    lower_red = np.array([175,80,100])
    upper_red = np.array([180,200,255])
    mask1 = cv2.inRange(img_hsv, lower_red, upper_red)
    mask = mask1 + mask0

    maskedge = get_corner(image)

    mask = mask - maskedge
    mask = np.maximum(mask, np.zeros((512, 512)))
    if debug:
        print("red: ")
        cv2_imshow(mask)
    num_col = np.sum(mask > 0)
    return num_col

In [None]:
def count_white(image, debug=False):
    lower_white = np.array([240, 240, 240])
    upper_white = np.array([255, 255, 255])
    mask = cv2.inRange(im, lower_white, upper_white)
    num_col = np.sum(mask > 0)
    if debug:
        print("white: ")
        cv2_imshow(mask)
    return num_col

In [None]:
def count_blue(image, debug=False):
    img_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    lower_blue=np.array([110, 150, 80],np.uint8)
    upper_blue=np.array([120, 220, 255],np.uint8)
    mask = cv2.inRange(img_hsv, lower_blue, upper_blue)
    if debug:
        print("blue: ")
        cv2_imshow(mask)
    num_col = np.sum(mask > 0)
    return num_col

In [None]:
def count_orange(image, debug=False):
    img_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    
    lower_red = np.array([5,50,50])
    upper_red = np.array([15,255,255])
    mask0 = cv2.inRange(img_hsv, lower_red, upper_red)
    lower_red = np.array([165,50,50])
    upper_red = np.array([175,255,255])
    mask1 = cv2.inRange(img_hsv, lower_red, upper_red)
    mask = mask1 + mask0

    maskedge = get_corner(image)

    mask = mask - maskedge
    mask = np.maximum(mask, np.zeros((512, 512)))
    if debug:
        print("orange: ")
        cv2_imshow(mask)
    num_col = np.sum(mask > 0)
    return num_col

In [None]:
def count_green(image, debug=False):
    img_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    lower_yellow=np.array([50, 100, 40],np.uint8)
    upper_yellow=np.array([70, 255, 255],np.uint8)
    mask = cv2.inRange(img_hsv, lower_yellow, upper_yellow)
    if debug:
        print("green: ")
        cv2_imshow(mask)
    num_col = np.sum(mask > 0)
    return num_col

In [None]:
def count_yellow(image, debug=False):
    img_hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    lower_yellow=np.array([20, 100, 100],np.uint8)
    upper_yellow=np.array([30, 255, 255],np.uint8)
    mask = cv2.inRange(img_hsv, lower_yellow, upper_yellow)
    if debug:
        print("yellow: ")
        cv2_imshow(mask)
    num_col = np.sum(mask > 0)
    return num_col

###main

In [None]:
def pre_process_data_X(image):
    cols_here = np.zeros((6))
    cols_here[0] = count_red(image)
    cols_here[1] = count_white(image)
    cols_here[2] = count_blue(image)
    cols_here[3] = count_yellow(image)
    cols_here[4] = count_green(image)
    cols_here[5] = count_orange(image)
    return cols_here

In [None]:
training_labels_df = pd.read_csv(TRAINING_LABELS_PATH)

ALL_DATA = np.zeros((5000, 7))

counter = 0

for _idx, row in tqdm.tqdm(training_labels_df.iterrows(), total=training_labels_df.shape[0]):
    filepath = os.path.join(
        TRAINING_IMAGES_FOLDER,
        row.filename
    )
    im = cv2.imread(filepath)
    
    data_X = pre_process_data_X(im)
    
    for i in range(6):
        ALL_DATA[counter][i] = data_X[i]
    ALL_DATA[counter][6] = row.xRot/360
    
    counter += 1

print(ALL_DATA)

##Checking the data

In [None]:
for i in range(20):
    print(ALL_DATA[i][6])

In [None]:
df = pd.DataFrame(data=ALL_DATA, columns=["red", "white", "blue", "yellow", "green", "orange", "xRot"])

In [None]:
df.head

###download for future

In [None]:
df.to_csv("orientme_first_preprocessing.csv", index=False)

In [None]:
files.download('orientme_first_preprocessing.csv')

##load preprocessed data

In [None]:
data=pd.read_csv("orientme_first_preprocessing.csv")

In [None]:
data.head()

##remove noises

In [None]:
def remove_noise(row):
    # print(row)
    indexes = ["red", "white", "blue", "yellow", "green", "orange"]
    for x in indexes:
        if row[x] <= 1000:
            row[x] = 0
    return row

In [None]:
data.apply(lambda rw: remove_noise(rw), axis=1)

In [None]:
data.head(10)

In [None]:
def remove_orange_noise(row):
    if row["red"] - row["orange"] >= 2000:
        row["orange"] = 0
    elif row["orange"] - row["red"] >= 1000:
        row["red"] = 0
    return row

In [None]:
data.apply(lambda rw: remove_orange_noise(rw), axis=1)

In [None]:
data.loc[data["orange"] >= 5000].loc[data["orange"] <= 9000]

In [None]:
def remove_red_when_orange(row):
    if row["orange"] >= 5000:
        row["red"] = 0
    return row

In [None]:
data.apply(lambda rw: remove_red_when_orange(rw), axis=1)

##download this data now

In [None]:
data.to_csv("orientme_second_preprocessing.csv", index=False)

In [None]:
files.download('orientme_second_preprocessing.csv')

##preprocess test set

In [None]:
TEST_DATA = np.zeros((5001, 6))
TEST_FILENAMES = []

counter = 0

for _test_image_path in tqdm.tqdm(glob.glob(os.path.join(TEST_IMAGES_FOLDER, "*.jpg"))):
    filename = os.path.basename(_test_image_path)
    im = cv2.imread(_test_image_path)
    
    data_X = pre_process_data_X(im)
    for i in range(6):
        TEST_DATA[counter][i] = data_X[i]
    counter += 1
    # TEST_DATA.append(data_X)
    TEST_FILENAMES.append(filename)


In [None]:
TEST_DATA = TEST_DATA.astype(int)

In [None]:
print(TEST_DATA)

In [None]:
test_dataframe = pd.DataFrame(data=TEST_DATA, columns=["red", "white", "blue", "yellow", "green", "orange"])
test_dataframe["filename"] = TEST_FILENAMES

In [None]:
test_dataframe.head()

In [None]:
test_dataframe.to_csv("orientme_testdata_first_preprocessing.csv", index=False)

In [None]:
files.download("orientme_testdata_first_preprocessing.csv")

In [None]:
df=pd.read_csv("/content/orientme_testdata_first_preprocessing.csv")

In [None]:
df.loc[df["orange"]<5000]

In [None]:
df.apply(lambda rw: remove_noise(rw), axis=1)

In [None]:
df.loc[df["red"] <= 1000]

In [None]:
df.apply(lambda rw: remove_orange_noise(rw), axis=1)

In [None]:
df.apply(lambda rw: remove_red_when_orange(rw), axis=1)

###download after second preprocess

In [None]:
df.to_csv("orientme_testdata_second_preprocessing.csv")

In [None]:
files.download("orientme_testdata_second_preprocessing.csv")

#NN

##imports

In [None]:
import os
import tqdm

import numpy as np
import pandas as pd
import cv2

import glob

from google.colab import files

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
import plotly.graph_objects as go

##load preprocessed data

In [163]:
train = pd.read_csv("orientme_first_preprocessing.csv");
test = pd.read_csv("orientme_testdata_first_preprocessing.csv")

In [164]:
train.head()

Unnamed: 0,red,white,blue,yellow,green,orange,xRot
0,6643.0,26875.0,0.0,0.0,3450.0,4837.0,0.935664
1,2910.0,0.0,21659.0,15035.0,2.0,3433.0,0.412457
2,30738.0,0.0,0.0,3119.0,4922.0,5547.0,0.68006
3,27567.0,0.0,71.0,10954.0,407.0,4720.0,0.618613
4,9341.0,0.0,12511.0,19476.0,16.0,4065.0,0.478773


In [165]:
test.head()

Unnamed: 0,red,white,blue,yellow,green,orange,filename
0,2510,0,23147,13010,1,3094,009805.jpg
1,10915,228,24357,0,11,10048,009738.jpg
2,28531,222,0,0,11853,5424,008471.jpg
3,9985,917,15056,0,17,8764,008284.jpg
4,24544,0,145,15079,96,4428,006827.jpg


##split traintest

In [166]:
training_set, validation_set= train_test_split(train, test_size=0.2, random_state=42)

In [167]:
X_train,y_train = training_set.iloc[:,:-1],training_set.iloc[:,-1]
X_val,y_val = validation_set.iloc[:,:-1],validation_set.iloc[:,-1]


In [168]:
X_val.shape

(1000, 6)

In [169]:
file_names = test["filename"]

In [170]:
file_names.head()

0    009805.jpg
1    009738.jpg
2    008471.jpg
3    008284.jpg
4    006827.jpg
Name: filename, dtype: object

In [171]:
test = test.drop(columns=["filename"])

In [172]:
test = test.loc[:, ~test.columns.str.contains('^Unnamed')]


In [173]:
test.head()

Unnamed: 0,red,white,blue,yellow,green,orange
0,2510,0,23147,13010,1,3094
1,10915,228,24357,0,11,10048
2,28531,222,0,0,11853,5424
3,9985,917,15056,0,17,8764
4,24544,0,145,15079,96,4428


##data normalization

In [174]:
train_col_max = train.max()
train_max = train_col_max.max()
test_col_max = test.max()
test_max = test_col_max.max()
total_max = max(test_max, train_max)

In [175]:
print(total_max)

30998.0


In [176]:
X_train = X_train / total_max
X_val = X_val / total_max
test = test / total_max

In [177]:
X_train.head()

Unnamed: 0,red,white,blue,yellow,green,orange
4227,0.284889,0.677302,0.35512,0.0,0.000387,0.238144
4676,0.171463,0.0,0.494129,0.600781,0.00029,0.122072
800,0.169172,0.902155,0.00029,0.0,0.073682,0.149816
3671,0.885541,0.0,0.002194,0.335118,0.01284,0.146784
4193,0.939835,0.004678,0.0,0.0,0.374444,0.17685


##train model

In [178]:
def create_model():
    model = Sequential()
    model.add(Dense(32, input_dim=6, activation='relu', name='dense_1'))
    model.add(Dense(32, activation='relu', name='dense_2'))
    model.add(Dense(16, activation='relu', name='dense_3'))
    model.add(Dense(8, activation='relu', name='dense_4'))
    model.add(Dense(1, activation='sigmoid', name='dense_output'))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model
model = Sequential()
model = create_model()
model.summary()

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                224       
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_4 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_output (Dense)         (None, 1)                 9         
Total params: 1,953
Trainable params: 1,953
Non-trainable params: 0
_________________________________________________________________


In [182]:
history = model.fit(X_train, y_train, epochs=100, validation_split=0.05)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [183]:
fig = go.Figure()
fig.add_trace(go.Scattergl(y=history.history['loss'],
                    name='Train'))
fig.add_trace(go.Scattergl(y=history.history['val_loss'],
                    name='Valid'))
fig.update_layout(height=500, width=700,
                  xaxis_title='Epoch',
                  yaxis_title='Loss')
fig.show()


In [184]:
mse_nn, mae_nn = model.evaluate(X_val, y_val)
print('Mean squared error on test data: ', mse_nn)

Mean squared error on test data:  0.004021082539111376


##predict

In [None]:
test.head()

In [None]:
predictions = model.predict(test)

In [None]:
predictions = predictions * 360

In [None]:
predictions

In [None]:
answers = pd.DataFrame(predictions, columns=["xRot"])
answers["filename"] = file_names

In [None]:
answers.head()

In [None]:
answers.to_csv("submission.csv")

In [None]:
files.download("submission.csv")