In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.utils import resample
from torchvision import datasets
from PIL import Image

# Memuat dataset Fashion MNIST
(train_images_mnist, train_labels_mnist), (test_images_mnist, test_labels_mnist) = fashion_mnist.load_data()
train_dataset = datasets.FashionMNIST('path_ke_folder', train=True, download=True)
test_dataset = datasets.FashionMNIST('path_ke_folder', train=False, download=True)
train_label_fashionmnist = train_dataset.targets
test_label_fashionmnist = test_dataset.targets

# Mengambil Subset label dari MNIST
# train_label_fashionmnist = train_label_fashionmnist[:40000]
# test_label_fashionmnist = test_label_fashionmnist[:8000]

# Baca file CSV
data_csv = pd.read_csv('mytradataset/baru.csv')

# Memuat label ke dalam variabel
label_csv = data_csv['articleType']


# Mengonversi label FashionImage menjadi tipe data string
label_csv = label_csv.astype(str)

# Menggabungkan label FashionImages dengan label MNIST
train_label_gabungan = np.concatenate([train_label_fashionmnist, label_csv[:4190]], axis=0)
total_label_gabungan = np.concatenate([train_label_gabungan, test_label_fashionmnist], axis=0)


# Menggabungkan gambar dari folder 'images' dengan dataset MNIST
image_folder = 'mytradataset/images(1)/'
image_data = []
image_count = 0
for filename in os.listdir(image_folder):
    if image_count >= 4190:
        break
    image_path = os.path.join(image_folder, filename)
    image = Image.open(image_path)
    
    # Mengubah gambar menjadi skala abu-abu (grayscale)
    image = image.convert('L')
    
    # Menyesuaikan dimensi gambar menjadi 28x28
    image = image.resize((28, 28))
    
    image_data.append(np.array(image))
    image_count += 1

# Konversi data gambar menjadi array numpy
image_data_fashion = np.array(image_data)

# # Memastikan jumlah gambar dan label sama
# train_images_mnist = train_images_mnist[:40000]
# test_images_mnist = test_images_mnist[:8000]

# Menggabungkan data gambar dari FashionImage dan MNIST
image_data = np.concatenate([image_data_fashion, train_images_mnist, test_images_mnist], axis=0)

# Melihat shape data gambar dan label
print("Shape data gambar:", image_data.shape)
print("Shape label:", total_label_gabungan.shape)


Shape data gambar: (52190, 28, 28)
Shape label: (52190,)


In [2]:
# Mengubah label menjadi bentuk one-hot encoding
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(total_label_gabungan.astype(str))
num_classes = len(label_encoder.classes_)

# Membagi data menjadi data latih dan data uji
x_train, x_test, y_train, y_test = train_test_split(image_data, encoded_labels, test_size=0.2, random_state=42)

# Normalisasi data gambar
x_train = x_train / 255.0
x_test = x_test / 255.0

# Reshape data gambar menjadi format yang sesuai untuk model CNN
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Membangun model 
model = Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])


# Mengompilasi model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Melatih model
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

# Evaluasi model pada data uji
_, test_accuracy = model.evaluate(x_test, y_test)
print("Akurasi pada data uji:", test_accuracy)


Epoch 1/10
Epoch 2/10