In [None]:
from fastai import *
from fastai.vision.all import *
from fastai.vision.widgets import *
from fastai.metrics import error_rate, accuracy
from torchvision.transforms import RandAugment
from torchvision.models.efficientnet import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import albumentations as A
import os
import timm
import seaborn as sns
import cv2
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

np.random.seed(42)

In [None]:
base_path = Path("/kaggle/input/dsc-logika-ui-2025")
train_path = base_path/'Train'/'Train'
test_path = base_path/'Test'/'Test'

base_path.ls()

In [None]:
class AlbumentationsTransform(Transform):
    def __init__(self, aug): self.aug = aug
    def encodes(self, img: PILImage):
        aug_img = self.aug(image=np.array(img))['image']
        return PILImage.create(aug_img)

aug = A.Compose
(
    [
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(
            shift_limit=0.0625,
            scale_limit=0.1,
            rotate_limit=15,
            p=0.7
        ),
        A.RandomBrightnessContrast(p=0.8),
        A.HueSaturationValue(
            hue_shift_limit=10,
            sat_shift_limit=20,
            val_shift_limit=10,
            p=0.5
        ),
        A.CoarseDropout(
            max_holes=8,
            max_height=25,
            max_width=25,
            p=0.5
        )
    ]
)

dls_small = ImageDataLoaders.from_folder(
    train_path,
    valid_pct = 0.2,
    seed = 42,
    loss_func = LabelSmoothingCrossEntropy(),
    item_tfms = RandomResizedCrop(128, min_scale=0.75),
    batch_tfms = AlbumentationsTransform(aug)
)

dls_large = ImageDataLoaders.from_folder(
    train_path,
    valid_pct = 0.2,
    seed = 42,
    loss_func = LabelSmoothingCrossEntropy(),
    item_tfms = RandomResizedCrop(224, min_scale=0.75),
    batch_tfms = AlbumentationsTransform(aug)
)

In [None]:
dls_small.show_batch(max_n=9)

In [None]:
test_files = get_image_files(test_path)
test_dl = dls_large.test_dl(test_files)

print("Classes:", dls_large.vocab)
print("Number of classes:", len(dls_large.vocab))
print("Number of training images:", len(dls_large.train_ds))
print("Number of validation images:", len(dls_large.valid_ds))

In [None]:
model = timm.create_model('convnext_tiny.fb_in1k', pretrained=True)

learn = vision_learner(
    dls_small,
    'convnext_tiny.fb_in1k',
    metrics = F1Score(average='macro'),
    path = '/kaggle/working/'
)

In [None]:
lrs_small = learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
display(
    lrs_small.minimum,
    lrs_small.steep,
    lrs_small.valley,
    lrs_small.slide
)

In [None]:
learn.fit_one_cycle(
    15, 
    lrs_small.valley,
    cbs = [
        MixUp(0.6), 
        SaveModelCallback(monitor='f1_score')
    ]
)

In [None]:
learn.dls = dls_large
lrs_large = learn.lr_find(suggest_funcs=(minimum, steep, valley, slide))
display(
    lrs_large.minimum,
    lrs_large.steep,
    lrs_large.valley,
    lrs_large.slide
)

In [None]:
learn.unfreeze()

learn.fit_one_cycle(
    40,
    lrs_large.valley,
    cbs = [
        MixUp(0.6), 
        SaveModelCallback(monitor='f1_score')
    ]
)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()
interp.plot_top_losses(9, figsize=(15,10))

In [None]:
interp.print_classification_report()

In [None]:
nama_file_model = 'klasifikasi-rumah-adat-v1.pkl'
learn.export(nama_file_model)

print(f"Model berhasil disimpan sebagai '{nama_file_model}'!")

In [None]:
test_files = get_image_files(test_path)
preds, _ = learn.get_preds(dl=dls_large.test_dl(test_files))
pred_labels = preds.argmax(dim=1)
label_names = [dls_large.vocab[i] for i in pred_labels]

In [None]:
# Prepare submission DataFrame with correct columns: 'id' and 'style'
submission_get_preds = pd.DataFrame({
    'id': [f.name[:-4] for f in test_files],
    'style': label_names
})

# Ensure the order matches sample_submission.csv
sample_sub_path = train_path/'sample_submission.csv'
if sample_sub_path.exists():
    sample_sub = pd.read_csv(sample_sub_path)
    submission_get_preds = submission_get_preds.set_index('id').loc[sample_sub['id']].reset_index()

submission_get_preds.sort_values('id', inplace = True)
submission_get_preds['style'].value_counts()

In [None]:
# Save to CSV
submission_get_preds.to_csv('submission_get_preds.csv', index=False)
print("Submission file saved as submission_get_preds.csv")

In [None]:
test_dl = dls_large.test_dl(test_files)
preds, _ = learn.tta(dl=test_dl)
pred_labels = preds.argmax(dim=1)
label_names = [dls_large.vocab[i] for i in pred_labels]

In [None]:
# Prepare submission DataFrame with correct columns: 'id' and 'style'
submission_tta = pd.DataFrame({
    'id': [f.name[:-4] for f in test_files],
    'style': label_names
})

# Ensure the order matches sample_submission.csv
sample_sub_path = train_path/'sample_submission.csv'
if sample_sub_path.exists():
    sample_sub = pd.read_csv(sample_sub_path)
    submission_tta = submission_tta.set_index('id').loc[sample_sub['id']].reset_index()

submission_tta.sort_values('id', inplace = True)
submission_tta['style'].value_counts()

In [None]:
# Save to CSV
submission_tta.to_csv('submission_tta.csv', index=False)
print("Submission file saved as submission_tta.csv")