# Initialising

In [1]:
!pip install aicrowd-cli
%load_ext aicrowd.magic



In [2]:
%aicrowd login

Please login here: [34m[1m[4mhttps://api.aicrowd.com/auth/EUO1Qc4Eqd6UUszOwZVLiF8Ufexaa2BG75eRfBWvvyw[0m
[32mAPI Key valid[0m
[32mSaved API Key successfully![0m


# Loading the Data

In [3]:
!rm -rf data
!mkdir data
%aicrowd ds dl -c orient-rubiks-cube -o data

test.tar.gz:   0%|          | 0.00/47.2M [00:00<?, ?B/s]

training.tar.gz:   0%|          | 0.00/47.2M [00:00<?, ?B/s]

In [4]:
import tarfile
training_tar = tarfile.open('/content/data/training.tar.gz')
training_tar.extractall('/content/data/train_images') # specify which folder to extract to
training_tar.close()

testing_tar = tarfile.open('/content/data/test.tar.gz')
testing_tar.extractall('/content/data/test_images') # specify which folder to extract to
testing_tar.close()

# Importing Libraries

In [5]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import r2_score

# Handling our Data and its DFs

In [6]:
train_dir = Path('/content/data/train_images/training/images')

In [7]:
labels_df = pd.read_csv('/content/data/train_images/training/labels.csv')

In [8]:
labels_df

Unnamed: 0,filename,xRot
0,000000.jpg,336.838887
1,000001.jpg,148.484378
2,000002.jpg,244.821661
3,000003.jpg,222.700630
4,000004.jpg,172.358121
...,...,...
4995,004995.jpg,90.167663
4996,004996.jpg,29.196271
4997,004997.jpg,99.502175
4998,004998.jpg,345.309582


In [9]:
labels_df["filepath"] = 'data/train_images/training/images/' + labels_df["filename"]
labels_df

Unnamed: 0,filename,xRot,filepath
0,000000.jpg,336.838887,data/train_images/training/images/000000.jpg
1,000001.jpg,148.484378,data/train_images/training/images/000001.jpg
2,000002.jpg,244.821661,data/train_images/training/images/000002.jpg
3,000003.jpg,222.700630,data/train_images/training/images/000003.jpg
4,000004.jpg,172.358121,data/train_images/training/images/000004.jpg
...,...,...,...
4995,004995.jpg,90.167663,data/train_images/training/images/004995.jpg
4996,004996.jpg,29.196271,data/train_images/training/images/004996.jpg
4997,004997.jpg,99.502175,data/train_images/training/images/004997.jpg
4998,004998.jpg,345.309582,data/train_images/training/images/004998.jpg


In [10]:
type(labels_df["filepath"][0])

str

In [11]:
# shuffling our dataset
image_df = labels_df.sample(frac=1.0, random_state=1).reset_index(drop=True)
image_df

Unnamed: 0,filename,xRot,filepath
0,002764.jpg,1.862614,data/train_images/training/images/002764.jpg
1,004767.jpg,312.866503,data/train_images/training/images/004767.jpg
2,003814.jpg,11.625699,data/train_images/training/images/003814.jpg
3,003499.jpg,299.059924,data/train_images/training/images/003499.jpg
4,002735.jpg,333.776785,data/train_images/training/images/002735.jpg
...,...,...,...
4995,002895.jpg,285.624027,data/train_images/training/images/002895.jpg
4996,002763.jpg,41.762361,data/train_images/training/images/002763.jpg
4997,000905.jpg,78.205917,data/train_images/training/images/000905.jpg
4998,003980.jpg,313.585484,data/train_images/training/images/003980.jpg


# Train Test Split

In [12]:
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)

In [13]:
# loading images
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [14]:
train_df

Unnamed: 0,filename,xRot,filepath
1334,004184.jpg,32.826308,data/train_images/training/images/004184.jpg
4768,004601.jpg,333.804216,data/train_images/training/images/004601.jpg
65,004828.jpg,342.057668,data/train_images/training/images/004828.jpg
177,000718.jpg,82.546377,data/train_images/training/images/000718.jpg
4489,003250.jpg,244.632241,data/train_images/training/images/003250.jpg
...,...,...,...
2895,001611.jpg,349.145202,data/train_images/training/images/001611.jpg
2763,003746.jpg,190.891097,data/train_images/training/images/003746.jpg
905,001892.jpg,248.235523,data/train_images/training/images/001892.jpg
3980,004270.jpg,23.885770,data/train_images/training/images/004270.jpg


# Defining Generators to load Data

In [15]:
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='xRot',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='xRot',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='filepath',
    y_col='xRot',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

Found 2800 validated image filenames.
Found 700 validated image filenames.
Found 1500 validated image filenames.


# Modeling & Training

In [16]:
inputs = tf.keras.Input(shape=(120, 120, 3))
x = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=120,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        )
    ]
)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

# Results

In [17]:
predicted_xRot = np.squeeze(model.predict(test_images))
true_xRot = test_images.labels

rmse = np.sqrt(model.evaluate(test_images, verbose=0))
print("     Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(true_xRot, predicted_xRot)
print("Test R^2 Score: {:.5f}".format(r2))

     Test RMSE: 8.30304
Test R^2 Score: 0.99360


In [18]:
mse = model.evaluate(test_images, verbose=0)
print("     Test MSE: {:.5f}".format(mse))

     Test MSE: 68.94040


# Preparing for submission

In [19]:
# column for image ids in test set
testing_img_ids = [f"00{i}.jpg" if len(str(i)) <= 4  else f"0{i}.jpg" for i in range(5000, 10001)]

In [20]:
test_sub = pd.DataFrame()
test_sub["filename"] = testing_img_ids

In [21]:
test_sub

Unnamed: 0,filename
0,005000.jpg
1,005001.jpg
2,005002.jpg
3,005003.jpg
4,005004.jpg
...,...
4996,009996.jpg
4997,009997.jpg
4998,009998.jpg
4999,009999.jpg


In [22]:
test_sub["filepath"] = 'data/test_images/images/' + test_sub["filename"]
test_sub

Unnamed: 0,filename,filepath
0,005000.jpg,data/test_images/images/005000.jpg
1,005001.jpg,data/test_images/images/005001.jpg
2,005002.jpg,data/test_images/images/005002.jpg
3,005003.jpg,data/test_images/images/005003.jpg
4,005004.jpg,data/test_images/images/005004.jpg
...,...,...
4996,009996.jpg,data/test_images/images/009996.jpg
4997,009997.jpg,data/test_images/images/009997.jpg
4998,009998.jpg,data/test_images/images/009998.jpg
4999,009999.jpg,data/test_images/images/009999.jpg


In [23]:
testsub_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [24]:
testsub_images = testsub_generator.flow_from_dataframe(
                            dataframe = test_sub,
                            x_col='filepath',
                            y_col=None,
                            target_size=(120, 120),
                            color_mode='rgb',
                            class_mode=None,
                            batch_size=32,
                            shuffle=False
                            )

Found 5001 validated image filenames.


In [25]:
predicted_xRot_sub = np.squeeze(model.predict(testsub_images))

In [26]:
len(predicted_xRot_sub)

5001

In [27]:
predicted_xRot_sub

array([330.65182 , 277.95358 , 293.52847 , ..., 331.73865 , 101.221565,
       132.91136 ], dtype=float32)

In [28]:
filename = testsub_images.filenames

In [29]:
os.path.split(filename[0])[1]

'005000.jpg'

In [30]:
files = []
for f in filename:
    f = os.path.split(f)[1]
    files.append(f)

In [31]:
len(filename)

5001

In [32]:
files

['005000.jpg',
 '005001.jpg',
 '005002.jpg',
 '005003.jpg',
 '005004.jpg',
 '005005.jpg',
 '005006.jpg',
 '005007.jpg',
 '005008.jpg',
 '005009.jpg',
 '005010.jpg',
 '005011.jpg',
 '005012.jpg',
 '005013.jpg',
 '005014.jpg',
 '005015.jpg',
 '005016.jpg',
 '005017.jpg',
 '005018.jpg',
 '005019.jpg',
 '005020.jpg',
 '005021.jpg',
 '005022.jpg',
 '005023.jpg',
 '005024.jpg',
 '005025.jpg',
 '005026.jpg',
 '005027.jpg',
 '005028.jpg',
 '005029.jpg',
 '005030.jpg',
 '005031.jpg',
 '005032.jpg',
 '005033.jpg',
 '005034.jpg',
 '005035.jpg',
 '005036.jpg',
 '005037.jpg',
 '005038.jpg',
 '005039.jpg',
 '005040.jpg',
 '005041.jpg',
 '005042.jpg',
 '005043.jpg',
 '005044.jpg',
 '005045.jpg',
 '005046.jpg',
 '005047.jpg',
 '005048.jpg',
 '005049.jpg',
 '005050.jpg',
 '005051.jpg',
 '005052.jpg',
 '005053.jpg',
 '005054.jpg',
 '005055.jpg',
 '005056.jpg',
 '005057.jpg',
 '005058.jpg',
 '005059.jpg',
 '005060.jpg',
 '005061.jpg',
 '005062.jpg',
 '005063.jpg',
 '005064.jpg',
 '005065.jpg',
 '005066.j

In [33]:
finaldf = pd.DataFrame({'filename': files,'xRot': predicted_xRot_sub})
finaldf

Unnamed: 0,filename,xRot
0,005000.jpg,330.651825
1,005001.jpg,277.953583
2,005002.jpg,293.528473
3,005003.jpg,107.734322
4,005004.jpg,14.792320
...,...,...
4996,009996.jpg,26.141451
4997,009997.jpg,50.281258
4998,009998.jpg,331.738647
4999,009999.jpg,101.221565


In [34]:
finaldf.to_csv("submission.csv", index=False)

In [35]:
!aicrowd submission create -c orient-rubiks-cube -f submission.csv

[2K[1;34msubmission.csv[0m [90m━━━━━━━━━━━━━━━━━━[0m [35m100.0%[0m • [32m105.1/103.5 KB[0m • [31m708.1 kB/s[0m • [36m0:00:00[0m
[?25h                                                      ╭─────────────────────────╮                                                       
                                                      │ [1mSuccessfully submitted![0m │                                                       
                                                      ╰─────────────────────────╯                                                       
[3m                                                            Important links                                                             [0m
┌──────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│  This submission │ https://www.aicrowd.com/challenges/kiit-ai-mini-blitz/problems/orient-rubiks-cube/submissions/166352              │
│              

## After submission, we were able to get a MSE of **78.103**, which is currently 2nd on the leaderboard.