In [55]:
import pandas as pd
import re
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Đọc và label dữ liệu

In [2]:
tag_mapping = {
    "Others": 0,
    "Honda": 1,
    "Hyundai": 2,
    "KIA": 3,
    "Mazda": 4,
    "Mitsubishi" : 5,
    "Toyota": 6,
    "Suzuki": 7,
    "Vinfast": 8
}

In [10]:
def extract_brand_from_path(path, tag_mapping):
    for brand in tag_mapping.keys():
        if re.search(fr'\b{brand}\b', path, re.IGNORECASE):
            return brand
    return "Unknown"

In [11]:
data_path = "clustering_results.csv"
data = pd.read_csv(data_path)

In [13]:
data['BrandName'] = data['ImageFullPath'].apply(lambda x:extract_brand_from_path(x, tag_mapping))
data['Label'] = data['BrandName'].map(tag_mapping).fillna(-1).astype(int)

In [14]:
data

Unnamed: 0,ImageFullPath,ClusterID,BrandName,Label
0,/content/drive/My Drive/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
1,/content/drive/My Drive/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
2,/content/drive/My Drive/dataset/CS114_ML\Others/21522373-21522499.L...,4,Others,0
3,/content/drive/My Drive/dataset/CS114_ML\Others/21522373-21522499.L...,4,Others,0
4,/content/drive/My Drive/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
...,...,...,...,...
31793,/content/drive/My Drive/dataset/CS114_ML\Vinfast/22521692-22521676....,2,Vinfast,8
31794,/content/drive/My Drive/dataset/CS114_ML\Vinfast/22521692-22521676....,4,Vinfast,8
31795,/content/drive/My Drive/dataset/CS114_ML\Vinfast/22521692-22521676....,2,Vinfast,8
31796,/content/drive/My Drive/dataset/CS114_ML\Vinfast/22521692-22521676....,4,Vinfast,8


In [15]:
data['Label'].unique()

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [20]:
brand_counts = data['BrandName'].value_counts()

brand_counts_df = brand_counts.reset_index()
brand_counts_df.columns = ['Brand', 'ImageCount']

ordered_brands = list(tag_mapping.keys())
ordered_brand_counts = brand_counts.reindex(ordered_brands).fillna(0).astype(int)

ordered_brand_counts_df = ordered_brand_counts.reset_index()
ordered_brand_counts_df.columns = ['Brand', 'ImageCount']
ordered_brand_counts_df

Unnamed: 0,Brand,ImageCount
0,Others,4469
1,Honda,2769
2,Hyundai,3088
3,KIA,2529
4,Mazda,2989
5,Mitsubishi,2689
6,Toyota,5092
7,Suzuki,5965
8,Vinfast,2208


In [None]:
def processing_img(file_paths, img_size):
    images = []
    for path in file_paths:
        try:
            img = load_img(path, target_size=img_size)
            img = img_to_array(img) / 255.0
            images.append(img)
        except Exception as e:
            print(f'Error loading image {path}: {e}')
    return np.array(images)

In [None]:
def prepare_data(data, img_size=(224, 224)):
    data = data[data['ImageFullPath'].apply(os.path.exists)]
    file_paths = data['ImageFullPath'].values
    labels = to_categorical(data['Label'], num_classes=len(tag_mapping))

    # chia tập train và tập test
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=0.2, random_state=42, stratify=data['Label']
    )

    X_train = processing_img(train_paths, img_size)
    X_test = processing_img(test_paths, img_size)

    return X_train, X_test, train_labels, test_labels

In [None]:
X_train, X_test, y_train, y_test = prepare_data(data)

In [88]:
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import load_model

# weight class

In [75]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(len(tag_mapping)),
    y=data['Label'].values
)

In [146]:
print("Class weights:")
for class_index, weight in class_weights.items():
    print(f"Class {class_index}: Weight {weight}")

Class weights:
Class 0: Weight 0.812
Class 1: Weight 1.311
Class 2: Weight 1.154
Class 3: Weight 1.402
Class 4: Weight 1.182
Class 5: Weight 1.313
Class 6: Weight 0.693
Class 7: Weight 0.592
Class 8: Weight 1.614


In [76]:
class_weights = {i: weight for i, weight in enumerate(class_weights)}

In [77]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='avg')
base_model.trainable = True

In [78]:
for layer in base_model.layers[:100]:  
    layer.trainable = False


# fine tuning model

In [79]:
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(len(tag_mapping), activation='softmax')
])

In [80]:
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

In [82]:
checkpoint = ModelCheckpoint('best_car_brand_model_classweight.keras', monitor='val_accuracy', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)

In [83]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=40,
    batch_size=32,
    class_weight=class_weights,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m637s[0m 786ms/step - accuracy: 0.1391 - loss: 2.9155 - val_accuracy: 0.2299 - val_loss: 2.2843
Epoch 2/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 704ms/step - accuracy: 0.2152 - loss: 2.4708 - val_accuracy: 0.3052 - val_loss: 2.0430
Epoch 3/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 693ms/step - accuracy: 0.2725 - loss: 2.2227 - val_accuracy: 0.3385 - val_loss: 1.9297
Epoch 4/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m593s[0m 746ms/step - accuracy: 0.3143 - loss: 2.0538 - val_accuracy: 0.3678 - val_loss: 1.8424
Epoch 5/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 574ms/step - accuracy: 0.3613 - loss: 1.9007 - val_accuracy: 0.4057 - val_loss: 1.7637
Epoch 6/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m460s[0m 578ms/step - accuracy: 0.3863 - loss: 1.8010 - val_accuracy: 0.4280 - val_loss: 1.6950
Epoc

In [84]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test accuracy: {accuracy}')

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 312ms/step - accuracy: 0.74 - loss: 1.2376
Test accuracy: 0.74


In [85]:
y_pred = model.predict(X_test).argmax(axis=1)
y_test_labels = y_test.argmax(axis=1)
print("Classification Report:")
print(classification_report(y_test_labels, y_pred, target_names=list(tag_mapping.keys())))

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 315ms/step
Classification Report:
              precision    recall  f1-score   support

      Others       0.65      0.62      0.63       894
       Honda       0.67      0.60      0.63       554
     Hyundai       0.65      0.63      0.64       618
         KIA       0.60      0.59      0.59       506
       Mazda       0.75      0.78      0.76       598
  Mitsubishi       0.55      0.62      0.58       538
      Toyota       0.70      0.69      0.69      1018
      Suzuki       0.82      0.80      0.81      1193
     Vinfast       0.76      0.80      0.78       441

    accuracy                           0.74      6360
   macro avg       0.69      0.70      0.69      6360
weighted avg       0.76      0.80      0.73      6360



In [86]:
model.save('best_car_brand_model_classweight.keras')