In [12]:
import pandas as pd
import os
from PIL import Image
from keras.utils import img_to_array, load_img
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df = pd.DataFrame(columns=['imagePath', 'label'])

directories = {
    "ayam": "./dataset/ayam_goreng/",
    "ayam": "./dataset/ayam_pop/",
    "daging_rendang": "./dataset/daging_rendang/",
    "dendeng_batokok": "./dataset/dendeng_batokok/",
    "gulai_ikan": "./dataset/gulai_ikan/",
    "gulai_tambusu": "./dataset/gulai_tambusu/",
    "telur_balado": "./dataset/telur_balado/",
    "telur_dadar": "./dataset/telur_dadar/",
    "tahu": "./dataset/tahu/",
    "daun_singkong": "./dataset/daun_singkong/",
    "nangka": "./dataset/nangka/",
    "perkedel": "./dataset/perkedel/",
    "nasi" : "./dataset/nasi/"
}

for label, directory in directories.items():
    for i in os.listdir(directory):
        df = pd.concat([df, pd.DataFrame({'imagePath': [f"{directory}/{i}"], 'label': [label]})])

df = df.reset_index(drop=True)

imgs = []

for image_path in df['imagePath']:
    img = load_img(image_path, target_size=(127, 127)).convert('RGB')
    img_array = img_to_array(img) / 128
    imgs.append(img_array)

df['img'] = imgs

In [13]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
x = np.array(df['img'].tolist())

y = le.fit_transform(df['label'])

In [27]:
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

def cutout(image, mask_size=20, mask_value=0):
    h, w, _ = image.shape
    top = np.random.randint(0 - mask_size // 2, h - mask_size)
    left = np.random.randint(0 - mask_size // 2, w - mask_size)
    bottom = top + mask_size
    right = left + mask_size
    image[max(0, top):min(h, bottom), max(0, left):min(w, right), :] = mask_value
    return image

# Define a new preprocessing function that applies the cutout technique
def augment_image(image):
    image = image + np.random.normal(0, 0.05, image.shape)  # Gaussian noise
    image = cutout(image)  # Cutout
    return image

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.32,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest',
    preprocessing_function=augment_image
)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
datagen.fit(x_train)

In [14]:
x.shape

(2467, 127, 127, 3)

In [15]:
from keras.applications.vgg16 import VGG16, preprocess_input
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(127, 127, 3))

train_features = base_model.predict(datagen.flow(x_train, batch_size=32, shuffle=False), verbose=1)
test_features = base_model.predict(x_test, verbose=1)

x_train_features = train_features.reshape(train_features.shape[0], -1)
x_test_features = test_features.reshape(test_features.shape[0], -1)



In [16]:
model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=25, min_samples_split=5)
model.fit(x_train_features, y_train)

RandomForestClassifier(max_depth=25, min_samples_split=5, random_state=42)

In [17]:
y_pred = model.predict(x_test_features)
accuracy = '{:.1%}'.format(accuracy_score(y_test, y_pred))

print(f'Accuracy: {accuracy}')

Accuracy: 27.7%


In [14]:
from sklearn.decomposition import PCA

pca = PCA ()

model2 = RandomForestClassifier()

x_train = pca.fit_transform(x_train.reshape(x_train.shape[0], -1))
x_test = pca.fit_transform(x_test.reshape(x_test.shape[0], -1))
model2.fit(x_train, y_train)


RandomForestClassifier()

In [38]:
y_pred2 = model2.predict(x_test)
accuracy = '{:.1%}'.format(accuracy_score(y_test, y_pred2))

print(f'Accuracy: {accuracy}')

ValueError: X has 494 features, but RandomForestClassifier is expecting 1973 features as input.