In [None]:
# Nama File: 02_baseline_training.ipynb
# Author: Alfrida Sabar
# Deskripsi: Notebook untuk training baseline model YOLOv5 dengan CSPDarknet pada dataset mata uang Rupiah

# ==== Sel 1: Instalasi Dependencies ====
%pip install -q torch torchvision torchaudio
%pip install -q albumentations timm tqdm pyyaml opencv-python
%pip install -q roboflow
%pip install -q matplotlib seaborn
%pip install -q termcolor
# Import library standar
import os
import sys
import yaml
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# ==== Sel 2: Konfigurasi Proyek ====
# Tambahkan path project ke sys.path
PROJECT_ROOT = '/content/drive/MyDrive/SmartCash'
sys.path.append(PROJECT_ROOT)

# Import custom modules
from handlers.roboflow_handler import RoboflowHandler
from handlers.data_handler import DataHandler
from handlers.model_handler import ModelHandler
from utils.logger import SmartCashLogger
from utils.preprocessing import ImagePreprocessor
from utils.experiment_tracker import ExperimentTracker

# Setup logger
logger = SmartCashLogger(__name__)

In [None]:
# ==== Sel 3: Konfigurasi Environment ====
# Cek ketersediaan GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"🖥️ Device tersedia: {device}")

# Load konfigurasi
with open(f'{PROJECT_ROOT}/configs/base_config.yaml', 'r') as f:
    config = yaml.safe_load(f)

In [None]:
# ==== Sel 4: Persiapan Dataset ====
def prepare_dataset(use_roboflow=False):
    """
    Siapkan dataset untuk training baseline
    
    Args:
        use_roboflow (bool): Flag untuk menggunakan Roboflow atau dataset lokal
    """
    # Inisialisasi Data Handler
    data_handler = DataHandler(
        config_path=f'{PROJECT_ROOT}/configs/base_config.yaml'
    )
    
    if use_roboflow:
        # Inisialisasi Roboflow Handler
        roboflow_handler = RoboflowHandler(
            config_path=f'{PROJECT_ROOT}/configs/base_config.yaml'
        )
        
        # Download dan export dataset dari Roboflow
        logger.info("🌐 Mengunduh dataset dari Roboflow...")
        train_path, val_path, test_path = roboflow_handler.pull_dataset()
    else:
        # Gunakan dataset lokal
        logger.info("💾 Menggunakan dataset lokal...")
        train_path = f'{PROJECT_ROOT}/data/train'
        val_path = f'{PROJECT_ROOT}/data/valid'
        test_path = f'{PROJECT_ROOT}/data/test'
        
        # Pastikan struktur direktori benar
        for path in [train_path, val_path, test_path]:
            os.makedirs(os.path.join(path, 'images'), exist_ok=True)
            os.makedirs(os.path.join(path, 'labels'), exist_ok=True)
    
    # Validasi struktur dataset
    if data_handler.verify_dataset():
        # Dapatkan statistik dataset
        dataset_stats = data_handler.get_dataset_stats()
        logger.success("✅ Dataset siap digunakan!")
    else:
        logger.error("❌ Validasi dataset gagal!")
        raise ValueError("Dataset tidak valid")
    
    return train_path, val_path, test_path, dataset_stats

In [None]:
# ==== Sel 5: Preprocessing ====
def preprocess_data(train_path, val_path):
    """Lakukan preprocessing pada data training dan validasi"""
    preprocessor = ImagePreprocessor(
        config_path=f'{PROJECT_ROOT}/configs/base_config.yaml'
    )
    
    # Preprocessing untuk training set
    preprocessor.process_dataset(
        input_dir=train_path,
        output_dir=f'{PROJECT_ROOT}/data/processed/train',
        augment=True
    )
    
    # Preprocessing untuk validation set
    preprocessor.process_dataset(
        input_dir=val_path,
        output_dir=f'{PROJECT_ROOT}/data/processed/val',
        augment=False
    )
    
    # Validasi hasil preprocessing
    train_stats = preprocessor.validate_preprocessing(
        f'{PROJECT_ROOT}/data/processed/train'
    )
    val_stats = preprocessor.validate_preprocessing(
        f'{PROJECT_ROOT}/data/processed/val'
    )
    
    return train_stats, val_stats

In [None]:
# ==== Sel 6: Training Baseline Model ====
def train_baseline_model(train_path, val_path):
    """Training model baseline dengan CSPDarknet"""
    # Setup experiment tracker
    experiment_tracker = ExperimentTracker(
        experiment_name='baseline_csp_scenario1'
    )
    
    # Inisialisasi Model Handler
    model_handler = ModelHandler(
        config_path=f'{PROJECT_ROOT}/configs/base_config.yaml',
        num_classes=len(config['dataset']['classes'])
    )
    
    # Mulai run eksperimen
    experiment_tracker.start_run(
        run_name='baseline_position_variation',
        config=config
    )
    
    # Jalankan eksperimen untuk skenario baseline
    baseline_scenario = {
        'name': 'scenario_1',
        'description': 'YOLOv5 Default (CSPDarknet) - Variasi Posisi',
        'backbone': 'csp'
    }
    
    # Eksekusi training
    results = model_handler.run_experiment(
        scenario=baseline_scenario,
        train_path=train_path,
        val_path=val_path,
        test_path=None  # Nanti akan dipisahkan
    )
    
    # Log metrik ke experiment tracker
    experiment_tracker.log_metrics(results)
    experiment_tracker.end_run()
    
    return results

In [None]:
# ==== Sel 7: Visualisasi Hasil ====
def visualize_training_results(results):
    """Visualisasi hasil training baseline"""
    plt.figure(figsize=(15, 5))
    
    # Plot learning curve
    plt.subplot(131)
    plt.title('Learning Curve')
    plt.plot(results['training_loss'], label='Training Loss')
    plt.plot(results['validation_loss'], label='Validation Loss')
    plt.legend()
    
    # Plot metrik performa
    plt.subplot(132)
    metrics = ['precision', 'recall', 'f1_score']
    plt.bar(metrics, [results[m] for m in metrics])
    plt.title('Model Performance Metrics')
    
    # Plot confusion matrix
    plt.subplot(133)
    sns.heatmap(results['confusion_matrix'], annot=True, cmap='Blues')
    plt.title('Confusion Matrix')
    
    plt.tight_layout()
    plt.savefig(f'{PROJECT_ROOT}/results/baseline_training_results.png')
    plt.show()


In [None]:
# ==== Sel 8: Fungsi Utama ====
def run_notebook(use_roboflow=False):
    """
    Jalankan seluruh proses training baseline
    
    Args:
        use_roboflow (bool): Gunakan dataset dari Roboflow atau lokal
    """
    try:
        # Persiapan dataset
        train_path, val_path, test_path, dataset_stats = prepare_dataset(use_roboflow)
        logger.data(f"📊 Statistik Dataset: {dataset_stats}")
        
        # Preprocessing
        train_stats, val_stats = preprocess_data(train_path, val_path)
        logger.data(f"📈 Statistik Preprocessing Train: {train_stats}")
        logger.data(f"📈 Statistik Preprocessing Validasi: {val_stats}")
        
        # Training baseline model
        results = train_baseline_model(
            f'{PROJECT_ROOT}/data/processed/train', 
            f'{PROJECT_ROOT}/data/processed/val'
        )
        
        # Visualisasi
        visualize_training_results(results)
        
        logger.success("✨ Proses training baseline selesai!")
        
        return results
        
    except Exception as e:
        logger.error(f"❌ Terjadi kesalahan: {str(e)}")
        return None


In [None]:
# ==== Sel 9: Eksekusi Notebook ====
if __name__ == '__main__':
    # Contoh penggunaan: 
    # - False untuk dataset lokal
    # - True untuk dataset dari Roboflow
    results = run_notebook(use_roboflow=False)