In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf 
import os
import numpy as np
from PIL import Image
import psycopg2

In [2]:
# extract

base_path = "Klasifikasi_Sampah"

data = []

# loop through each class folder
for label in os.listdir(base_path):
    class_folder = os.path.join(base_path, label)

    # Skip non-directories
    if not os.path.isdir(class_folder):
        continue

    # Loop through files in the class folder
    for file_name in os.listdir(class_folder):
        file_path = os.path.join(class_folder, file_name)

        # Skip non-images (optional: check extensions)
        if not file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            continue

        data.append({
            'image_path': file_path,
            'label': label
        })

# Convert to DataFrame
df = pd.DataFrame(data)
df.head()



Unnamed: 0,image_path,label
0,Klasifikasi_Sampah\Anorganik\banbekas1.jpg,Anorganik
1,Klasifikasi_Sampah\Anorganik\banbekas2.jpg,Anorganik
2,Klasifikasi_Sampah\Anorganik\banbekas3.jpg,Anorganik
3,Klasifikasi_Sampah\Anorganik\banbekas4.jpg,Anorganik
4,Klasifikasi_Sampah\Anorganik\banbekas6.jpg,Anorganik


In [3]:
# Splitting dataset menjadi data train dan test

df_train, df_test = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)


# Cek distribusi
print("Train:", df_train['label'].value_counts())
print("Test :", df_test['label'].value_counts())

Train: label
B3           2108
Anorganik    1562
Organik       830
Name: count, dtype: int64
Test : label
B3           528
Anorganik    391
Organik      207
Name: count, dtype: int64


In [4]:
# Augmentation

train_datagen_aug = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.,
                                                                    rotation_range=20,
                                                                    zoom_range=0.05,
                                                                    width_shift_range=0.05,
                                                                    height_shift_range=0.05,
                                                                    shear_range=0.05,
                                                                    horizontal_flip=True,
                                                                    validation_split=0.3)

train_set_aug = train_datagen_aug.flow_from_dataframe(df_train,
                                                      directory=None,
                                                      target_size=(224,224),
                                                      x_col='image_path',
                                                      y_col='label',
                                                      class_mode='sparse',
                                                      subset='training',
                                                      batch_size=32,
                                                      shuffle=False,
                                                      seed=2)

val_set_aug = train_datagen_aug.flow_from_dataframe(df_train,
                                                    directory=None,
                                                    target_size=(224,224),
                                                    x_col='image_path',
                                                    y_col='label',
                                                    class_mode='sparse',
                                                    subset='validation',
                                                    batch_size=32,
                                                    shuffle=False,
                                                    seed=2)

test_datagen_aug = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)

test_set_aug = test_datagen_aug.flow_from_dataframe(df_test,
                                                    target_size=(224,224),  
                                                    directory=None,
                                                    class_mode='sparse',
                                                    x_col='image_path',
                                                    y_col='label',
                                                    batch_size=64,
                                                    shuffle=False,
                                                    seed=2)

Found 3150 validated image filenames belonging to 3 classes.
Found 1350 validated image filenames belonging to 3 classes.
Found 1126 validated image filenames belonging to 3 classes.


In [8]:
def save_generator_images(generator, output_root):
    os.makedirs(output_root, exist_ok=True)

    for class_name in generator.class_indices.keys():
        os.makedirs(os.path.join(output_root, class_name), exist_ok=True)

    total = len(generator)
    idx = 0

    for batch_x, batch_y in generator:
        for i in range(batch_x.shape[0]):
            img = (batch_x[i] * 255).astype(np.uint8)
            img_pil = Image.fromarray(img)

            label = list(generator.class_indices.keys())[int(batch_y[i])]
            out_path = os.path.join(output_root, label, f"{idx}.jpg")

            img_pil.save(out_path)
            idx += 1

        if idx >= generator.n:
            break

# SAVE TRAIN
save_generator_images(train_set_aug, "processed/train")

# SAVE VAL
save_generator_images(val_set_aug, "processed/val")

# SAVE TEST
save_generator_images(test_set_aug, "processed/test")


In [None]:
def insert_image_paths(root_dir, split):

    print("\n=== STARTING INSERT ===")

    # Connect ke DB
    conn = psycopg2.connect(
        host="localhost",
        database="final_project",
        port="5433",
        user="postgres",
        password="postgres"
    )
    cur = conn.cursor()

    # Cek database yang dipakai
    cur.execute("SELECT current_database();")
    print("Connected to DB:", cur.fetchone()[0])

    # Cek folder yang dibaca
    print("Reading folder:", root_dir)
    print("Classes:", os.listdir(root_dir))

    inserted = 0

    for class_name in os.listdir(root_dir):
        class_dir = os.path.join(root_dir, class_name)
        print("Processing class:", class_name)

        for filename in os.listdir(class_dir):
            file_path = os.path.join(class_dir, filename)

            cur.execute("""
                INSERT INTO images (split, class, file_path)
                VALUES (%s, %s, %s)
            """, (split, class_name, file_path))

            inserted += 1

    conn.commit()
    print("Total inserted:", inserted)

    cur.close()
    conn.close()

    print("=== DONE ===\n")

insert_image_paths("processed/train", "train")
insert_image_paths("processed/val", "val")
insert_image_paths("processed/test", "test")



=== STARTING INSERT ===
Connected to DB: final_project
Reading folder: processed/train
Classes: ['Anorganik', 'B3', 'Organik']
Processing class: Anorganik
Processing class: B3
Processing class: Organik
Total inserted: 3150
=== DONE ===


=== STARTING INSERT ===
Connected to DB: final_project
Reading folder: processed/val
Classes: ['Anorganik', 'B3', 'Organik']
Processing class: Anorganik
Processing class: B3
Processing class: Organik
Total inserted: 1350
=== DONE ===


=== STARTING INSERT ===
Connected to DB: final_project
Reading folder: processed/test
Classes: ['Anorganik', 'B3', 'Organik']
Processing class: Anorganik
Processing class: B3
Processing class: Organik
Total inserted: 1126
=== DONE ===

