## Import Library

In [1]:
import os
import shutil
import random
import re
from sklearn.model_selection import train_test_split

## Split Data Subjek

In [2]:
# Path dataset asal dan tujuan
base_path = 'dataset'
dataset_path = os.path.join(base_path, 'DriverDrowsinessDataset')
destination_path = os.path.join(base_path, 'DatasetSubject')

In [3]:
def manage_subject_files(source_path, destination_path):
    # Pastikan folder tujuan ada, jika tidak buat
    if not os.path.exists(destination_path):
        os.makedirs(destination_path)

    # Iterasi setiap file di folder sumber
    for category in ['Drowsy', 'Non Drowsy']:
        category_path = os.path.join(source_path, category)
        category_destination = os.path.join(destination_path, category)
        if not os.path.exists(category_destination):
            os.makedirs(category_destination)

        for file_name in os.listdir(category_path):
            if file_name.endswith(('.png', '.jpg', '.jpeg')):
                # Ambil karakter pertama dari nama file sebagai subjek
                subject_folder = file_name[0].lower()
                subject_path = os.path.join(category_destination, subject_folder)

                # Buat folder subjek jika belum ada
                if not os.path.exists(subject_path):
                    os.makedirs(subject_path)

                # Pindahkan file ke folder subjek
                src_file = os.path.join(category_path, file_name)
                dst_file = os.path.join(subject_path, file_name)
                shutil.move(src_file, dst_file)

In [4]:
# Jalankan fungsi
manage_subject_files(dataset_path, destination_path)
print("Split dataset subjek selesai.")

Split dataset subjek selesai.


### Split subjek z

In [5]:
# Path asal dan tujuan
base_path = 'dataset/DatasetSubject'
categories = ['Drowsy', 'Non Drowsy']  # Dua kategori dataset
subfolder_to_split = 'z'  # Folder yang ingin diproses

In [6]:
# Fungsi untuk memisahkan file ke sub-folder berdasarkan prefix nama file
def split_subject_in_z(category_path):
    z_path = os.path.join(category_path, subfolder_to_split)

    # Cek apakah folder 'z' ada
    if not os.path.exists(z_path):
        print(f"Folder {z_path} tidak ditemukan.")
        return

    # Iterasi semua file di folder 'z'
    for filename in os.listdir(z_path):
        if not filename.endswith(('.png', '.jpg', '.jpeg')):
            continue  # Skip jika bukan file gambar

        # Tentukan prefix dari nama file (contoh: za, zb, zc)
        subject_prefix = filename[:2]  # Dua huruf pertama

        # Buat sub-folder berdasarkan prefix jika belum ada
        subject_folder = os.path.join(z_path, subject_prefix)
        if not os.path.exists(subject_folder):
            os.makedirs(subject_folder)

        # Pindahkan file ke sub-folder yang sesuai
        src_file = os.path.join(z_path, filename)
        dest_file = os.path.join(subject_folder, filename)
        shutil.move(src_file, dest_file)

    print(f"Proses pemisahan file di folder '{z_path}' selesai.")

In [7]:
# Jalankan fungsi untuk setiap kategori
for category in categories:
    category_path = os.path.join(base_path, category)
    split_subject_in_z(category_path)

Proses pemisahan file di folder 'dataset/DatasetSubject\Drowsy\z' selesai.
Proses pemisahan file di folder 'dataset/DatasetSubject\Non Drowsy\z' selesai.


In [9]:
# Base path
base_path = 'dataset/DatasetSubject'

# List kategori utama
categories = ['Drowsy', 'Non Drowsy']

In [10]:
for category in categories:
    category_path = os.path.join(base_path, category)
    z_folder = os.path.join(category_path, 'z')

    if os.path.exists(z_folder):
        # Pindahkan semua subfolder di dalam 'z' ke level utama
        for subfolder in os.listdir(z_folder):
            subfolder_path = os.path.join(z_folder, subfolder)
            target_path = os.path.join(category_path, subfolder)

            if os.path.isdir(subfolder_path):
                print(f"Memindahkan {subfolder} dari {z_folder} ke {category_path}")
                shutil.move(subfolder_path, target_path)

        # Hapus folder 'z' jika sudah kosong
        if not os.listdir(z_folder):
            print(f"Menghapus folder kosong: {z_folder}\n")
            os.rmdir(z_folder)

Memindahkan ZA dari dataset/DatasetSubject\Drowsy\z ke dataset/DatasetSubject\Drowsy
Memindahkan ZB dari dataset/DatasetSubject\Drowsy\z ke dataset/DatasetSubject\Drowsy
Memindahkan ZC dari dataset/DatasetSubject\Drowsy\z ke dataset/DatasetSubject\Drowsy
Menghapus folder kosong: dataset/DatasetSubject\Drowsy\z

Memindahkan za dari dataset/DatasetSubject\Non Drowsy\z ke dataset/DatasetSubject\Non Drowsy
Memindahkan zb dari dataset/DatasetSubject\Non Drowsy\z ke dataset/DatasetSubject\Non Drowsy
Memindahkan zc dari dataset/DatasetSubject\Non Drowsy\z ke dataset/DatasetSubject\Non Drowsy
Menghapus folder kosong: dataset/DatasetSubject\Non Drowsy\z



In [11]:
for category in categories:
    print(f"{category} folders:")
    folder_path = os.path.join(base_path, category)
    print(os.listdir(folder_path))

Drowsy folders:
['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'ZA', 'ZB', 'ZC']
Non Drowsy folders:
['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x', 'y', 'za', 'zb', 'zc']


In [12]:
# Loop melalui setiap kategori dan folder
for category in categories:
    print(f"\n{category}:")
    folder_path = os.path.join(base_path, category)

    # Ambil semua folder, urutkan berdasarkan abjad
    folders = sorted(os.listdir(folder_path))

    # Variabel untuk menghitung total file dalam kategori
    total_files = 0

    # Loop melalui setiap folder (subjek) di dalam kategori
    for folder in folders:
        folder_sub_path = os.path.join(folder_path, folder)

        if os.path.isdir(folder_sub_path):  # Hanya untuk folder
            # Menghitung jumlah file dalam folder
            file_count = len(os.listdir(folder_sub_path))
            total_files += file_count  # Tambah ke total

            print(f"{folder} : {file_count} files")

    # Mencetak jumlah total file di dalam kategori
    print(f"\nTotal {category} files: {total_files} files")


Drowsy:
A : 1411 files
B : 315 files
C : 335 files
D : 179 files
E : 962 files
F : 415 files
G : 499 files
H : 508 files
I : 1095 files
J : 474 files
K : 630 files
L : 732 files
M : 777 files
N : 1156 files
O : 1097 files
P : 963 files
Q : 562 files
R : 204 files
S : 487 files
T : 933 files
U : 420 files
V : 653 files
W : 1162 files
X : 1749 files
Y : 1112 files
ZA : 621 files
ZB : 1551 files
ZC : 1346 files

Total Drowsy files: 22348 files

Non Drowsy:
a : 1252 files
b : 409 files
c : 400 files
d : 1005 files
e : 1000 files
g : 109 files
h : 571 files
i : 1045 files
j : 717 files
k : 538 files
l : 381 files
m : 473 files
n : 957 files
o : 671 files
p : 190 files
q : 521 files
r : 522 files
s : 457 files
u : 510 files
v : 1002 files
w : 493 files
x : 1143 files
y : 1500 files
za : 1054 files
zb : 1237 files
zc : 1288 files

Total Non Drowsy files: 19445 files


## Split Data Final

In [14]:
# Path asal dataset
base_path = 'dataset'
dataset_subject_path = os.path.join(base_path, 'DatasetSubject')
dataset_final_path = os.path.join(base_path, 'DatasetFinal')

# Path tujuan split dataset
train_path = os.path.join(dataset_final_path, 'train')
val_path = os.path.join(dataset_final_path, 'val')
test_path = os.path.join(dataset_final_path, 'test')

In [15]:
# Fungsi untuk memastikan folder ada
def ensure_folder_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

# Fungsi untuk membagi dataset ke dalam train, val, test
def split_dataset(source_folder, train_folder, val_folder, test_folder, split_ratio):
    ensure_folder_exists(train_folder)
    ensure_folder_exists(val_folder)
    ensure_folder_exists(test_folder)

    for subject in os.listdir(source_folder):
        subject_path = os.path.join(source_folder, subject)

        if os.path.isdir(subject_path):
            files = [f for f in os.listdir(subject_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
            random.shuffle(files)

            train_count = int(len(files) * split_ratio[0])
            val_count = int(len(files) * split_ratio[1])

            train_files = files[:train_count]
            val_files = files[train_count:train_count + val_count]
            test_files = files[train_count + val_count:]

            # Pindahkan file ke folder train
            for file in train_files:
                ensure_folder_exists(os.path.join(train_folder, subject))
                shutil.move(os.path.join(subject_path, file), os.path.join(train_folder, subject, file))

            # Pindahkan file ke folder val
            for file in val_files:
                ensure_folder_exists(os.path.join(val_folder, subject))
                shutil.move(os.path.join(subject_path, file), os.path.join(val_folder, subject, file))

            # Pindahkan file ke folder test
            for file in test_files:
                ensure_folder_exists(os.path.join(test_folder, subject))
                shutil.move(os.path.join(subject_path, file), os.path.join(test_folder, subject, file))

In [16]:
# Jalankan pembagian untuk kategori Drowsy dan Non Drowsy
split_ratio = (0.7, 0.15, 0.15)  # Train 70%, Val 15%, Test 15%

# Drowsy
split_dataset(
    os.path.join(dataset_subject_path, 'Drowsy'),
    os.path.join(train_path, 'Drowsy'),
    os.path.join(val_path, 'Drowsy'),
    os.path.join(test_path, 'Drowsy'),
    split_ratio
)

# Non Drowsy
split_dataset(
    os.path.join(dataset_subject_path, 'Non Drowsy'),
    os.path.join(train_path, 'Non Drowsy'),
    os.path.join(val_path, 'Non Drowsy'),
    os.path.join(test_path, 'Non Drowsy'),
    split_ratio
)

In [17]:
# Fungsi untuk mencetak jumlah file dalam folder
def count_files_in_folders(category_path):
    total_files = 0
    print(f"\n{category_path.split('/')[-1]}:")
    for subject in os.listdir(category_path):
        subject_path = os.path.join(category_path, subject)
        if os.path.isdir(subject_path):
            file_count = len([f for f in os.listdir(subject_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
            total_files += file_count
            print(f"{subject} : {file_count} files")
    print(f"Total {category_path.split('/')[-1]} file: {total_files} files")

In [18]:
# Mencetak jumlah file untuk kategori Drowsy dan Non Drowsy pada train, val, test
categories = ['Drowsy', 'Non Drowsy']

# Train
print("Train:")
for category in categories:
    count_files_in_folders(os.path.join(train_path, category))

# Val
print("\nVal:")
for category in categories:
    count_files_in_folders(os.path.join(val_path, category))

# Test
print("\nTest:")
for category in categories:
    count_files_in_folders(os.path.join(test_path, category))

Train:

dataset\DatasetFinal\train\Drowsy:
A : 987 files
B : 220 files
C : 234 files
D : 125 files
E : 673 files
F : 290 files
G : 349 files
H : 355 files
I : 766 files
J : 331 files
K : 441 files
L : 512 files
M : 543 files
N : 809 files
O : 767 files
P : 674 files
Q : 393 files
R : 142 files
S : 340 files
T : 653 files
U : 294 files
V : 457 files
W : 813 files
X : 1224 files
Y : 778 files
ZA : 434 files
ZB : 1085 files
ZC : 942 files
Total dataset\DatasetFinal\train\Drowsy file: 15631 files

dataset\DatasetFinal\train\Non Drowsy:
a : 876 files
b : 286 files
c : 280 files
d : 703 files
e : 700 files
g : 76 files
h : 399 files
i : 731 files
j : 501 files
k : 376 files
l : 266 files
m : 331 files
n : 669 files
o : 469 files
p : 133 files
q : 364 files
r : 365 files
s : 319 files
u : 357 files
v : 701 files
w : 345 files
x : 800 files
y : 1050 files
za : 737 files
zb : 865 files
zc : 901 files
Total dataset\DatasetFinal\train\Non Drowsy file: 13600 files

Val:

dataset\DatasetFinal\val\D