<a href="https://colab.research.google.com/github/laluwisnu/UAP-ML/blob/main/Untitled76.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json
from google.colab import drive

# 1. Mount Google Drive
drive.mount('/content/drive')

dataset_path = '/content/drive/MyDrive/dataset coco'
image_folder = os.path.join(dataset_path, 'val2017')
annotation_file = os.path.join(dataset_path, 'annotations/captions_val2017.json')

# 3. Hitung Jumlah Gambar
if os.path.exists(image_folder):
    num_images = len([f for f in os.listdir(image_folder) if f.endswith(('.jpg', '.jpeg'))])
    print(f"Jumlah Gambar di folder 'val2017': {num_images}")
else:
    print("Folder gambar tidak ditemukan!")

# 4. Hitung Jumlah Caption dalam JSON
if os.path.exists(annotation_file):
    with open(annotation_file, 'r') as f:
        data = json.load(f)
        # COCO JSON biasanya memiliki key 'annotations'
        num_captions = len(data['annotations'])
        print(f"Jumlah Total Caption di 'captions_val2017.json': {num_captions}")
else:
    print("File anotasi JSON tidak ditemukan!")

# 5. Cek Kesesuaian Syarat UAP (Minimal 5.000 data)
if num_images >= 5000:
    print("✅ Syarat minimal 5.000 data terpenuhi.")
else:
    print("⚠️ Data kurang dari 5.000. Anda mungkin perlu melakukan augmentasi.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Jumlah Gambar di folder 'val2017': 5000
Jumlah Total Caption di 'captions_val2017.json': 25014
✅ Syarat minimal 5.000 data terpenuhi.


In [None]:
import pandas as pd
from pycocotools.coco import COCO

# Load anotasi instances untuk mendapatkan kategori (label)
ann_file_instances = os.path.join(dataset_path, 'annotations/instances_val2017.json')
ann_file_captions = os.path.join(dataset_path, 'annotations/captions_val2017.json')

coco_obj = COCO(ann_file_instances)
coco_caps = COCO(ann_file_captions)

# Pilih kategori yang ingin Anda fokuskan (misal 5 kategori agar seimbang)
target_categories = ['person', 'motorcycle', 'car', 'dog', 'chair']
catIds = coco_obj.getCatIds(catNms=target_categories)

data_list = []

for catId in catIds:
    imgIds = coco_obj.getImgIds(catIds=catId)
    category_name = coco_obj.loadCats(catId)[0]['name']

    for imgId in imgIds[:1000]:  # Ambil 1000 sampel per kategori agar seimbang
        img_info = coco_obj.loadImgs(imgId)[0]
        annIds = coco_caps.getAnnIds(imgIds=imgId)
        anns = coco_caps.loadAnns(annIds)

        # Ambil satu caption pertama
        caption = anns[0]['caption'] if anns else ""

        data_list.append({
            'image_path': os.path.join(image_folder, img_info['file_name']),
            'label': category_name,
            'caption': caption
        })

df = pd.DataFrame(data_list)
print(df['label'].value_counts())
df.to_csv('metadata_siap_latih.csv', index=False)

loading annotations into memory...
Done (t=1.66s)
creating index...
index created!
loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
label
person        1000
chair          580
car            535
dog            177
motorcycle     159
Name: count, dtype: int64


In [54]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Augmentasi khusus untuk Training agar data lebih variatif
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2 # Split 20% untuk validasi
)

# Generator untuk data Training
train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Generator untuk data Validasi
val_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

Found 1961 validated image filenames belonging to 5 classes.
Found 490 validated image filenames belonging to 5 classes.


#Neural Network Base

In [51]:
from tensorflow.keras import layers, models

def build_base_cnn(num_classes):
    model = models.Sequential([
        layers.Input(shape=(224, 224, 3)),

        layers.Conv2D(32, (3,3), activation='relu'),
        layers.MaxPooling2D(),

        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D(),

        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D(),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


# === Build & Save ===
model_cnn = build_base_cnn(5)
model_cnn.summary()

model_cnn.save("model_cnn_FINAL.keras")
print("✅ Base CNN disimpan")


✅ Base CNN disimpan


# ResNet50

In [50]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models

def build_resnet(num_classes):
    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(224,224,3)
    )
    base_model.trainable = False  # Wajib utk pretrained

    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(
        inputs=base_model.input,
        outputs=outputs
    )

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


# === Build & Save ===
model_resnet = build_resnet(5)
model_resnet.summary()

model_resnet.save("model_resnet_FINAL.keras")
print("✅ ResNet50 disimpan")


✅ ResNet50 disimpan


#mobilenet

In [67]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Load base model tanpa top layer
base_mobile = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_mobile.trainable = False

# Tambahkan layer custom
x = base_mobile.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(5, activation='softmax')(x)

model_mobilenet = Model(inputs=base_mobile.input, outputs=outputs)

# Compile model
model_mobilenet.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_mobilenet.summary()


In [65]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# visualisasi setelah training
def plot_history(history):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.title('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title('Loss')
    plt.legend()
    plt.show()