In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    print(f"{dirname} contains {len(filenames)} files")


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input contains 0 files
/kaggle/input/chinese-food-175-dataset contains 1 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175 contains 0 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175 contains 1 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/红烧冬瓜 contains 140 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/豉汁蒸凤爪 contains 294 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/红烧豆腐 contains 287 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/麻辣豆腐 contains 279 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/冬瓜排骨汤 contains 291 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/葱爆羊肉 contains 290 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175/清炒芥蓝 contains 283 files
/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-

In [None]:
# =============================================================================
# CELL 1: Enhanced Initialization and Setup
# =============================================================================

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.font_manager as fm
import subprocess
import random
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries successfully imported.")

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
print("🎲 Random seeds set for reproducibility.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"🖥️ Using device: {device} - {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print(f"🖥️ Using device: {device}")

base_data_dir = '/kaggle/input/chinese-food-175-dataset'
data_root_dir = os.path.join(base_data_dir, 'chinese-food-175', 'chinese-food-175')
print(f"📁 Dataset root directory: {data_root_dir}")

font_name = 'WenQuanYi Zen Hei'
font_found = any(font_name in f.name for f in fm.fontManager.ttflist)
if not font_found:
    print(f"⚠️ Font '{font_name}' not found. Installing...")
    subprocess.run(["apt-get", "update"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    subprocess.run(["apt-get", "install", "-y", "fonts-wqy-zenhei"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    print("🔁 Font installed.")
else:
    print(f"✅ Font '{font_name}' already available.")

try:
    font_path = fm.findfont(fm.FontProperties(family=font_name), fontext='ttf')
    fe = fm.FontEntry(fname=font_path, name=font_name)
    fm.fontManager.ttflist.insert(0, fe)
    plt.rcParams.update({'font.family': fe.name, 'font.sans-serif': fe.name})
    print(f"🖋️ Font configured: {fe.name}")
except:
    print("⚠️ Font setup failed, using default font.")

print("\n✅ Cell 1: Enhanced initialization complete.")

✅ Libraries successfully imported.
🎲 Random seeds set for reproducibility.
🖥️ Using device: cuda:0 - Tesla T4
💾 GPU Memory: 14.7 GB
📁 Dataset root directory: /kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175
⚠️ Font 'WenQuanYi Zen Hei' not found. Installing...
🔁 Font installed.
🖋️ Font configured: WenQuanYi Zen Hei

✅ Cell 1: Enhanced initialization complete.


In [None]:
# =============================================================================
# CELL 2: CONFIGURATION AND DATA PREPARATION
# =============================================================================
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from torchvision.transforms import AutoAugment, AutoAugmentPolicy
import torch

data_root_dir = '/kaggle/input/chinese-food-175-dataset/chinese-food-175/chinese-food-175'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🌍 Using device: {device}")

train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    AutoAugment(AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.2, scale=(0.02, 0.20)),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
print("✅ success.")


full_dataset = datasets.ImageFolder(data_root_dir)
class_names = full_dataset.classes
num_classes = len(class_names)
print(f"🔍 Dataset found it. class total: {num_classes}")

train_indices, val_indices = train_test_split(
    list(range(len(full_dataset))),
    test_size=0.2,
    stratify=full_dataset.targets,
    random_state=42
)

class DatasetWithTransform(torch.utils.data.Dataset):
    def __init__(self, subset, transform):
        self.subset = subset
        self.transform = transform
    def __getitem__(self, index):
        x, y = self.subset[index]
        return self.transform(x), y
    def __len__(self):
        return len(self.subset)

train_dataset = DatasetWithTransform(torch.utils.data.Subset(full_dataset, train_indices), train_transforms)
val_dataset = DatasetWithTransform(torch.utils.data.Subset(full_dataset, val_indices), val_transforms)

batch_size = 32
num_workers = 2
dataloaders = {
    'train': torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True),
    'val': torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
print("✅ Dataloader for training ready to use.")

In [None]:
# =============================================================================
# CELL 3: DEFINITION MODEL AND TRAINING CONFIGURATION
# =============================================================================
from torchvision.models import EfficientNet_B4_Weights
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

print("🚀 Memuat arsitektur EfficientNet-B4...")
weights = EfficientNet_B4_Weights.IMAGENET1K_V1
model_ft = models.efficientnet_b4(weights=weights)

num_ftrs = model_ft.classifier[1].in_features
model_ft.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(num_ftrs, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(inplace=True),
    nn.Dropout(0.2),
    nn.Linear(512, num_classes)
)
model_ft = model_ft.to(device)
print("🔧 Classifier head already modified.")

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer_ft = optim.AdamW(model_ft.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler_ft = lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=50, eta_min=1e-6)
print("✅ Loss, optimizer, and scheduler already configure.")

In [None]:
# =============================================================================
# CELL 4: MAIN TRAINING FUNCTION
# =============================================================================
import time
import copy
from tqdm.notebook import tqdm
import numpy as np

def train_model(model, criterion, optimizer, scheduler, num_epochs=25, patience=5):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    epochs_no_improve = 0
    
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
    
    scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print('-' * 20)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0
            
            progress_bar = tqdm(dataloaders[phase], desc=f"{phase.capitalize()} Epoch {epoch+1}", leave=False)

            for inputs, labels in progress_bar:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase.upper()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            
            if phase == 'train':
                history['train_loss'].append(epoch_loss)
                history['train_acc'].append(epoch_acc.item())
            else:
                history['val_loss'].append(epoch_loss)
                history['val_acc'].append(epoch_acc.item())
                
                if epoch_acc > best_acc:
                    print(f"🎉 accuracy improve : ({best_acc:.4f} --> {epoch_acc:.4f}). Menyimpan model...")
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    epochs_no_improve = 0
                else:
                    epochs_no_improve += 1

        if phase == 'train' and scheduler:
             scheduler.step()

        if epochs_no_improve >= patience:
            print(f"\n🛑 Early stopping! no improvement {patience} epoch.")
            break
            
    time_elapsed = time.time() - since
    print(f'\nTraining Complete {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'🏆 Best Accuracy: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model, history

print("✅ Main Training Function Ready.")

In [None]:
# =============================================================================
# CELL 5: TRAINING AND SAVING MODEL
# =============================================================================
import matplotlib.pyplot as plt

print("\n🚀 Running Process Started...")
model_ft_trained, training_history = train_model(
    model_ft, 
    criterion, 
    optimizer_ft, 
    scheduler_ft, 
    num_epochs=15, 
    patience=5
)

best_val_acc = max(training_history['val_acc'])
print(f"Best Accuracy: {best_val_acc*100:.2f}%")

final_model_path = 'best_model_food_classifier.pth'
print(f"Saving Best Model: {final_model_path}")

torch.save({
    'model_state_dict': model_ft_trained.state_dict(),
    'best_val_acc': best_val_acc,
    'class_names': class_names,
    'val_transforms_code': str(val_transforms) 
}, final_model_path)

print(f"✅ Model Saved '{final_model_path}'. Already To Download!")

def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['train_acc'], label='Train Accuracy')
    plt.plot(history['val_acc'], label='Validation Accuracy')
    plt.title('Accuracy vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Loss vs. Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()

print("\n📊 Showing Grafik Training...")
plot_training_history(training_history)