In [80]:
import os, cv2, random
import numpy as np
import tensorflow as tf
from datetime import datetime
from tqdm import tqdm
import pandas as pd

IMG_SIZE = 100

print(f"START Setting Directories: {datetime.today().strftime('%Y-%m-%d-%H%M')}")
train_data_folder = "shopee-product-detection-dataset/train/train"
test_data_folder = "shopee-product-detection-dataset/test/test"
categories = list(map(str,  range(0,42)))
categories = ["0" + x if len(x) == 1 else x for x in categories]
print(f"COMPLETE Setting Directories: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

START Setting Directories: 2020-06-29-2115
COMPLETE Setting Directories: 2020-06-29-2115


In [2]:
print(f"START Import and Shuffle Train Data: {datetime.today().strftime('%Y-%m-%d-%H%M')}")
train_data = []
for category in categories:
    path = os.path.join(train_data_folder, category)
    class_num = categories.index(category)
    for img in tqdm(os.listdir(path)):
        try:
            img_array = cv2.imread(os.path.join(path, img))
            img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB) # Changed to RGB instead of greyscale
            new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
            train_data.append([new_array, class_num])
        except:
            pass
random.shuffle(train_data)
print(f"COMPLETE Import and Shuffle Train Data: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

print(f"START Creating X_train and y_train {datetime.today().strftime('%Y-%m-%d-%H%M')}")
X_train = []
y_train = []
for features,label in train_data:
    X_train.append(features)
    y_train.append(label)

X_train = np.array(X_train).reshape(-1, IMG_SIZE, IMG_SIZE, 3).astype("float32")
X_train = tf.keras.utils.normalize(X_train, axis = 1)
y_train = np.array(y_train).astype("int32")
print(f"COMPLETE: Creating X_train and y_train {datetime.today().strftime('%Y-%m-%d-%H%M')}")

  0%|          | 11/2683 [00:00<00:24, 107.69it/s]

START Setting Directories: 2020-06-28-2339
COMPLETE Setting Directories: 2020-06-28-2339
START Import and Shuffle Train Data: 2020-06-28-2339


100%|██████████| 2683/2683 [00:23<00:00, 114.35it/s]
100%|██████████| 2703/2703 [00:21<00:00, 125.47it/s]
100%|██████████| 2687/2687 [00:24<00:00, 108.60it/s]
100%|██████████| 2703/2703 [00:22<00:00, 117.68it/s]
100%|██████████| 2703/2703 [00:20<00:00, 130.98it/s]
100%|██████████| 2641/2641 [00:16<00:00, 162.83it/s]
100%|██████████| 2641/2641 [00:19<00:00, 132.09it/s]
100%|██████████| 2661/2661 [00:17<00:00, 153.32it/s]
100%|██████████| 2700/2700 [00:19<00:00, 141.53it/s]
100%|██████████| 2698/2698 [00:20<00:00, 130.09it/s]
100%|██████████| 2673/2673 [00:21<00:00, 126.30it/s]
100%|██████████| 1843/1843 [00:13<00:00, 134.58it/s]
100%|██████████| 2691/2691 [00:20<00:00, 133.50it/s]
100%|██████████| 2682/2682 [00:19<00:00, 138.24it/s]
100%|██████████| 2685/2685 [00:16<00:00, 161.18it/s]
100%|██████████| 2632/2632 [00:19<00:00, 137.51it/s]
100%|██████████| 2665/2665 [00:21<00:00, 125.28it/s]
100%|██████████| 1553/1553 [00:09<00:00, 171.75it/s]
100%|██████████| 2104/2104 [00:12<00:00, 164.0

COMPLETE Import and Shuffle Train Data: 2020-06-28-2351
START Creating X_train and y_train 2020-06-28-2351
COMPLETE: Creating X_train and y_train 2020-06-28-2356


In [7]:
print(f"START Training Model: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

NAME = f"conv64-conv256-dropout0.5-dense128-dense128-batch32-{datetime.today().strftime('%Y-%m-%d-%H%M')}"
print(NAME)

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(64, (3, 3), activation = "relu", input_shape = (IMG_SIZE, IMG_SIZE, 3)))
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2))

model.add(tf.keras.layers.Conv2D(256, (3, 3), activation = "relu"))
model.add(tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2))

model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(128, activation = "relu"))
model.add(tf.keras.layers.Dense(128, activation = "relu"))
model.add(tf.keras.layers.Dense(42, activation = "softmax"))
model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
tensorboard = tf.keras.callbacks.TensorBoard(log_dir = f"logs/{NAME}")

model.fit(X_train, y_train, epochs = 5, batch_size = 32, validation_split = 0.1, callbacks = [tensorboard])
print(f"COMPLETE Training Model: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

START Training Model: 2020-06-29-0018
conv128-conv128-dropout0.8-dense128-dense128-batch32-2020-06-29-0018
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
COMPLETE Training Model: 2020-06-29-0328


In [9]:
model.save(f"{NAME}")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: conv128-conv128-dropout0.8-dense128-dense128-batch32-2020-06-29-0018/assets


In [3]:
model = tf.keras.models.load_model("conv64-conv256-dropout0.5-dense128-dense128-batch32-2020-06-29-0018")

In [7]:
print(f"START Import and Preprocessing X_test: {datetime.today().strftime('%Y-%m-%d-%H%M')}")
test_data = []
test_data_name = []
for img in os.listdir(test_data_folder):
    img_array = cv2.imread(os.path.join(test_data_folder, img))
    img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
    new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    test_data.append(new_array)
    test_data_name.append(img)

X_test = []
for features in test_data:
    X_test.append(features)
X_test = np.array(X_test).reshape(-1, IMG_SIZE, IMG_SIZE, 3).astype("float32")
X_test = tf.keras.utils.normalize(X_test, axis = 1)
print(f"COMPLETE Import and Preprocessing X_test: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

START Import and Preprocessing X_test: 2020-06-29-2041
COMPLETE Import and Preprocessing X_test: 2020-06-29-2043


In [79]:
print(f"START Predicting X_test: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

predictions = model.predict(X_test)

labels = []
for x in predictions:
    prediction = np.amax(x)
    label = np.where(x == prediction)[0][0]
    labels.append(label)

print(f"COMPLETE Predicting X_test: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

print(f"START Output File: {datetime.today().strftime('%Y-%m-%d-%H%M')}")
labels_df = pd.DataFrame(test_data_name)
labels_df["labels"] = labels
df = pd.read_csv("shopee-product-detection-dataset/test.csv")
df = pd.merge(df, labels_df, how = "left", left_on = "filename", right_on = 0)
df = df.drop([0, "category"], axis = 1 )
df.columns = ["filename", "category"]
df.to_csv("attempt.csv", index = False)
print(f"COMPLETE Output File: {datetime.today().strftime('%Y-%m-%d-%H%M')}")

START Predicting X_test: 2020-06-29-2114
COMPLETE Predicting X_test: 2020-06-29-2115
START Output File: 2020-06-29-2115
COMPLETE Output File: 2020-06-29-2115
