In [None]:
!pip install efficientnet_pytorch

In [2]:
import numpy as np
import pandas as pd
import os
import zipfile
from tqdm import tqdm
from PIL import Image
from efficientnet_pytorch import EfficientNet
import time
import copy
from sklearn.model_selection import train_test_split


import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torchvision import transforms
from torch.optim import Adam

import warnings

import mlflow, mlflow.pytorch



warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [3]:
# 1️⃣ Hubungkan ke MLflow Server (UI harus sudah dijalankan)
mlflow.set_tracking_uri("http://localhost:5000")

# 2️⃣ (Opsional tapi disarankan) Set experiment
mlflow.set_experiment("congestion_classifier")

<Experiment: artifact_location='mlflow-artifacts:/752962008456535285', creation_time=1754589098524, experiment_id='752962008456535285', last_update_time=1754589098524, lifecycle_stage='active', name='congestion_classifier', tags={}>

#EXTRACT DATA

In [4]:
os.makedirs("congestion", exist_ok=True)

with zipfile.ZipFile("congestion.zip", 'r') as zip_ref:
    zip_ref.extractall("congestion")

folders = os.listdir("congestion")
print(folders)

path_v1 = os.path.join("congestion", "trafficnet_dataset_v1")
path_train = os.path.join(path_v1, "train")
path_sparse = os.path.join(path_train, "sparse_traffic")
path_dense = os.path.join(path_train, "dense_traffic")

FileNotFoundError: [Errno 2] No such file or directory: 'congestion.zip'

In [4]:
df_list = []

for f in os.listdir(path_dense):
  file_path = os.path.join(path_dense, f)
  if os.path.isfile(file_path):
    df_list.append({
        'image': str(file_path),
        'congestion': 1
    })

for f in os.listdir(path_sparse):
  file_path = os.path.join(path_sparse, f)
  if os.path.isfile(file_path):
    df_list.append({
        'image': str(file_path),
        'congestion': 0
    })

df = pd.DataFrame(df_list)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
display(df.head())

Unnamed: 0,image,congestion
0,congestion\trafficnet_dataset_v1\train\sparse_...,0
1,congestion\trafficnet_dataset_v1\train\sparse_...,0
2,congestion\trafficnet_dataset_v1\train\sparse_...,0
3,congestion\trafficnet_dataset_v1\train\sparse_...,0
4,congestion\trafficnet_dataset_v1\train\sparse_...,0


#CONFIGS

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

RANDOM_STATE = 42
MODEL_VERSION = 'efficientnet-b0'
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 3e-4
RETRAIN = False
CHECKPOINT_TO_LOAD = 'best_congestion_classifier.pt'

Using device: cuda


In [6]:
class CongestionDataset(Dataset):

  def __init__(self, dataframe):
    self.dataframe = dataframe

    self.transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

  def __getitem__(self, key):
    row = self.dataframe.iloc[key]
    image = self.transform(Image.open(row['image']))
    label = torch.tensor(row['congestion'], dtype=torch.float32)
    return image,label.unsqueeze(0)

  def __len__(self):
    return len(self.dataframe)

In [7]:
train_df, val_df = train_test_split(df, test_size=0.1, random_state=42, stratify=df['congestion'])

train_dataset = CongestionDataset(train_df)
val_dataset = CongestionDataset(val_df)

dataloaders = {
    'train': DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0),
    'val': DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=0)
}

dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset)
}


#Modeling

In [8]:
model = EfficientNet.from_pretrained("efficientnet-b0")

for param in model.parameters():
  param.requires_grad = False

num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs, 1)

Loaded pretrained weights for efficientnet-b0


In [9]:
def train_model(model, criterion, optimizer, dataloaders, device, num_epochs=10, checkpoint_path='best_model.pt'):

    with mlflow.start_run(run_name="congestion-classifier-v1"):

        # Logging params
        mlflow.log_param("model_version", MODEL_VERSION)
        mlflow.log_param("batch_size", BATCH_SIZE)
        mlflow.log_param("learning_rate", LEARNING_RATE)
        mlflow.log_param("epochs", num_epochs)

        since = time.time()
        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch+1}/{num_epochs}')
            print('-' * 10)

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0
                running_corrects = 0

                for inputs, labels in tqdm(dataloaders[phase], desc=f"{phase} phase"):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    optimizer.zero_grad()

                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)

                        loss = criterion(outputs, labels)

                        preds = torch.sigmoid(outputs) > 0.5

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # ✅ Log metrics ke MLflow
                mlflow.log_metric(f"{phase}_loss", epoch_loss, step=epoch)
                mlflow.log_metric(f"{phase}_acc", epoch_acc, step=epoch)

                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save({
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'best_accuracy': best_acc,
                    }, checkpoint_path)

        time_elapsed = time.time() - since
        print(f'Best val Acc: {best_acc:4f}')
        # ✅ Log model ke MLflow
        mlflow.pytorch.log_model(model, "model")

        # Register model ke registry bernama "efficientnet_congestion"
        mlflow.register_model(
        "runs:/" + mlflow.active_run().info.run_id + "/model",
        "efficientnet_congestion"
        )


        # ✅ Log model checkpoint file
        if os.path.exists(checkpoint_path):
            mlflow.log_artifact(checkpoint_path)

        mlflow.end_run()

        model.load_state_dict(best_model_wts)
        return model

In [10]:
def predict_image(model, image_path, device):

    model.eval()

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image)
    image_tensor = image_tensor.unsqueeze(0)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        output = model(image_tensor)
        prob = torch.sigmoid(output)
        prob_value = prob.item()

    prediction = "Dense" if prob_value > 0.5 else "Sparse"

    return prediction, prob_value


In [11]:
if __name__ == '__main__':

    model = model.to(DEVICE)

    criterion = nn.BCEWithLogitsLoss()
    optimizer = Adam(model._fc.parameters(), lr=LEARNING_RATE)

    if RETRAIN and os.path.exists(CHECKPOINT_TO_LOAD):
        checkpoint = torch.load(CHECKPOINT_TO_LOAD, map_location=DEVICE)
        model.load_state_dict(checkpoint['model_state_dict'])
        print("Checkpoint loaded.")

    trained_model = train_model(model, criterion, optimizer, dataloaders, DEVICE, num_epochs=EPOCHS, checkpoint_path=CHECKPOINT_TO_LOAD)

Epoch 1/5
----------


train phase: 100%|██████████| 62/62 [00:28<00:00,  2.15it/s]


Train Loss: 0.6365 Acc: 0.7066


val phase: 100%|██████████| 7/7 [00:02<00:00,  2.48it/s]


Val Loss: 0.5618 Acc: 0.8136
Epoch 2/5
----------


train phase: 100%|██████████| 62/62 [00:11<00:00,  5.40it/s]


Train Loss: 0.5088 Acc: 0.8742


val phase: 100%|██████████| 7/7 [00:01<00:00,  5.75it/s]


Val Loss: 0.4568 Acc: 0.8727
Epoch 3/5
----------


train phase: 100%|██████████| 62/62 [00:11<00:00,  5.44it/s]


Train Loss: 0.4357 Acc: 0.8828


val phase: 100%|██████████| 7/7 [00:01<00:00,  5.64it/s]


Val Loss: 0.3928 Acc: 0.8864
Epoch 4/5
----------


train phase: 100%|██████████| 62/62 [00:11<00:00,  5.43it/s]


Train Loss: 0.3905 Acc: 0.8879


val phase: 100%|██████████| 7/7 [00:01<00:00,  5.58it/s]


Val Loss: 0.3566 Acc: 0.9136
Epoch 5/5
----------


train phase: 100%|██████████| 62/62 [00:11<00:00,  5.41it/s]


Train Loss: 0.3512 Acc: 0.8949


val phase: 100%|██████████| 7/7 [00:01<00:00,  5.68it/s]


Val Loss: 0.3341 Acc: 0.8955
Best val Acc: 0.913636


Registered model 'efficientnet_congestion' already exists. Creating a new version of this model...
2025/08/08 01:00:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: efficientnet_congestion, version 1
Created version '1' of model 'efficientnet_congestion'.


🏃 View run congestion-classifier-v1 at: http://localhost:5000/#/experiments/752962008456535285/runs/c4bc291ca3434cc39913c91cef45a61d
🧪 View experiment at: http://localhost:5000/#/experiments/752962008456535285


In [25]:
checkpoint = torch.load(CHECKPOINT_TO_LOAD, map_location=DEVICE)
model.load_state_dict(checkpoint['model_state_dict'])
inference_model = model.to(DEVICE)

SAMPLE_IMAGE_PATH = "test.png"

if os.path.exists(SAMPLE_IMAGE_PATH):
    predicted_class, probability = predict_image(inference_model, SAMPLE_IMAGE_PATH, DEVICE)

    print(f"\nGambar: {SAMPLE_IMAGE_PATH}")
    print(f"Prediksi: {predicted_class}")
    print(f"Probabilitas (Dense): {probability:.4f}")

else:
    print(f"\nError: File gambar ga ketemu")


Gambar: test.png
Prediksi: Dense
Probabilitas (Dense): 0.7228
