In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from keras.applications import ResNet50
from keras.applications.resnet import preprocess_input
import optuna

data = []
labels = []
classes = 43
cur_path = os.getcwd()

# Retrieving the images and their labels 
for i in range(classes):
    path = os.path.join(cur_path, 'train', str(i))
    images = os.listdir(path)

    for a in images:
        try:
            image = Image.open(os.path.join(path, a))
            image = image.resize((32, 32))
            image = np.array(image)
            data.append(image)
            labels.append(i)
        except:
            print("Error loading image")

# Converting lists into numpy arrays
data = np.array(data)
labels = np.array(labels)

print(data.shape, labels.shape)

# Splitting training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Preprocessing the data for ResNet50
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Converting the labels into one hot encoding
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)

def create_model(trial):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(30, 30, 3))

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(units=trial.suggest_int('units', 128, 512, step=64), activation='relu'))
    model.add(Dropout(rate=trial.suggest_float('dropout', 0.3, 0.7, step=0.1)))
    model.add(Dense(43, activation='softmax'))

    base_model.trainable = False

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

def objective(trial):
    model = create_model(trial)
    history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=0)
    accuracy = history.history['val_accuracy'][-1]
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_trial = study.best_trial
print('Best trial: score {}, params {}'.format(best_trial.value, best_trial.params))

# Building the best model
model = create_model(best_trial)
history = model.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

model.save("my_model_resnet50_tuned_optuna.h5")

# Plotting graphs for accuracy 
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

# Testing accuracy on test dataset
y_test_csv = pd.read_csv('Test.csv')

labels = y_test_csv["ClassId"].values
imgs = y_test_csv["Path"].values

data = []

for img in imgs:
    image = Image.open(img)
    image = image.resize((30, 30))
    data.append(np.array(image))

X_test = np.array(data)
X_test = preprocess_input(X_test)

pred = np.argmax(model.predict(X_test), axis=1)

# Accuracy with the test data
from sklearn.metrics import accuracy_score
print(accuracy_score(labels, pred))

model.save("traffic_classifier_resnet50_tuned_optuna.h5")

(39209, 32, 32, 3) (39209,)


[I 2024-05-31 14:21:31,657] A new study created in memory with name: no-name-cb2d7603-1778-494c-96b6-0c34346b541a
[W 2024-05-31 14:21:31,659] Trial 0 failed with parameters: {} because of the following error: ValueError('Input size must be at least 32x32; Received: input_shape=(30, 30, 3)').
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\Lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_29080\601653358.py", line 75, in objective
    model = create_model(trial)
            ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\AppData\Local\Temp\ipykernel_29080\601653358.py", line 56, in create_model
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(30, 30, 3))
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Admin\anaconda3\Lib\site-packages\keras\src\app

(31367, 32, 32, 3) (7842, 32, 32, 3) (31367,) (7842,)


ValueError: Input size must be at least 32x32; Received: input_shape=(30, 30, 3)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from keras.applications import ResNet50
from keras.applications.resnet import preprocess_input
import optuna

data = []
labels = []
classes = 43
cur_path = os.getcwd()

# Retrieving the images and their labels 
for i in range(classes):
    path = os.path.join(cur_path, 'train', str(i))
    images = os.listdir(path)

    for a in images:
        try:
            image = Image.open(os.path.join(path, a))
            image = image.resize((32, 32))
            image = np.array(image)
            data.append(image)
            labels.append(i)
        except Exception as e:
            print(f"Error loading image {a}: {e}")

# Converting lists into numpy arrays
data = np.array(data)
labels = np.array(labels)

print(data.shape, labels.shape)

# Splitting training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Preprocessing the data for ResNet50
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Converting the labels into one hot encoding
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)

def create_model(trial):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

    model = Sequential()
    model.add(base_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dense(units=trial.suggest_int('units', 128, 512, step=64), activation='relu'))
    model.add(Dropout(rate=trial.suggest_float('dropout', 0.3, 0.7, step=0.1)))
    model.add(Dense(43, activation='softmax'))

    base_model.trainable = False

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

def objective(trial):
    model = create_model(trial)
    history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=0)
    accuracy = history.history['val_accuracy'][-1]
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

best_trial = study.best_trial
print('Best trial: score {}, params {}'.format(best_trial.value, best_trial.params))

# Building the best model
model = create_model(best_trial)
history = model.fit(X_train, y_train, epochs=15, validation_data=(X_test, y_test))

model.save("my_model_resnet50_tuned_optuna.h5")

# Plotting graphs for accuracy 
plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

# Testing accuracy on test dataset
y_test_csv = pd.read_csv('Test.csv')

labels = y_test_csv["ClassId"].values
imgs = y_test_csv["Path"].values

data = []

for img in imgs:
    image = Image.open(img)
    image = image.resize((32, 32))
    data.append(np.array(image))

X_test = np.array(data)
X_test = preprocess_input(X_test)

pred = np.argmax(model.predict(X_test), axis=1)

# Accuracy with the test data
from sklearn.metrics import accuracy_score
print(accuracy_score(labels, pred))

model.save("traffic_classifier_resnet50_tuned_optuna.h5")


(39209, 32, 32, 3) (39209,)


[I 2024-05-31 14:27:18,887] A new study created in memory with name: no-name-97157c50-255e-41b8-bd6b-b5fc3ed5e4ce


(31367, 32, 32, 3) (7842, 32, 32, 3) (31367,) (7842,)


  optimizer=tf.keras.optimizers.Adam(learning_rate=trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)),
[I 2024-05-31 15:02:07,789] Trial 0 finished with value: 0.2717418968677521 and parameters: {'units': 512, 'dropout': 0.6000000000000001, 'learning_rate': 0.00735230122511182}. Best is trial 0 with value: 0.2717418968677521.
[I 2024-05-31 15:20:29,856] Trial 1 finished with value: 0.4157102704048157 and parameters: {'units': 448, 'dropout': 0.4, 'learning_rate': 0.006823541510838817}. Best is trial 1 with value: 0.4157102704048157.
[I 2024-05-31 15:38:56,894] Trial 2 finished with value: 0.6694720983505249 and parameters: {'units': 128, 'dropout': 0.4, 'learning_rate': 0.00241812808500705}. Best is trial 2 with value: 0.6694720983505249.
[I 2024-05-31 15:58:44,141] Trial 3 finished with value: 0.7431777715682983 and parameters: {'units': 448, 'dropout': 0.3, 'learning_rate': 0.0031210096013615843}. Best is trial 3 with value: 0.7431777715682983.
[I 2024-05-31 16:16:17,560] Trial 4