In [None]:
from google.colab import drive, files
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [None]:
import zipfile

filename = '/content/gdrive/My Drive/sunda_dataset.zip'
zip_ref = zipfile.ZipFile(filename)
zip_ref.extractall()
zip_ref.close()

In [None]:
import os
import shutil
import random

In [None]:
ROOT = '/content/sunda_dataset'

In [None]:
classes = os.listdir(ROOT)
classes.sort()

In [None]:
train_dir, val_dir, test_dir = ['training', 'validation', 'testing']

# Create train, validation, and test folders
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

In [None]:
# random seed for reproducibility
random.seed(421)

# train:val:test = 0.70:0.15:0.15
train_per, val_per, test_per = [0.7, 0.15, 0.15]

path = '/content'

# Create train, val, and test file for each letter
for letter_name in classes:
  train_path = os.path.join(path, train_dir, letter_name)
  val_path = os.path.join(path, val_dir, letter_name)
  test_path = os.path.join(path, test_dir, letter_name)
  os.makedirs(train_path, exist_ok=True)
  os.makedirs(val_path, exist_ok=True)
  os.makedirs(test_path, exist_ok=True)

  # original dataset image path
  image_path = os.path.join(ROOT,letter_name)

  # list of images in the folder
  images = os.listdir(image_path)

  # Shuffle the images
  random.shuffle(images)

  # Calculate the number of images for each split
  num_images = len(images)
  num_train = int(train_per * num_images)
  num_val = int(val_per * num_images)
  num_test = num_images - num_train - num_val

  # Split the images into train, test, and validation sets
  train_images = images[:num_train]
  val_images = images[num_train:num_train + num_val]
  test_images = images[num_train + num_val:]

  # Move the images to the respective train, test, and validation subfolders
  for image in train_images:
    img_src_path = os.path.join(ROOT, letter_name, image)
    img_dst_path = os.path.join(train_path, image)
    os.makedirs(os.path.dirname(img_dst_path), exist_ok=True)
    shutil.copy(img_src_path, img_dst_path)

  for image in val_images:
    img_src_path = os.path.join(ROOT, letter_name, image)
    img_dst_path = os.path.join(val_path, image)
    os.makedirs(os.path.dirname(img_dst_path), exist_ok=True)
    shutil.copy(img_src_path, img_dst_path)

  for image in test_images:
    img_src_path = os.path.join(ROOT, letter_name, image)
    img_dst_path = os.path.join(test_path, image)
    os.makedirs(os.path.dirname(img_dst_path), exist_ok=True)
    shutil.copy(img_src_path, img_dst_path)

print('Split completed!')

Split completed!


In [None]:
root_folders = ['training', 'validation', 'testing']
# Iterate through the root folders (train, test, val)
for folder in root_folders:
  print(f"{folder}:")
  root_path = os.path.join(path, folder)
  img_paths = os.listdir(root_path)
  img_paths.sort()
  for img_path in img_paths:
    img_list_path = os.path.join(root_path, img_path)
    count = len(os.listdir(img_list_path))
    print(f'{img_path}: {count} image')

training:
sunda_a: 84 image
sunda_ae: 84 image
sunda_ba: 87 image
sunda_ca: 87 image
sunda_da: 87 image
sunda_e: 85 image
sunda_eu: 84 image
sunda_fa: 84 image
sunda_ga: 87 image
sunda_ha: 84 image
sunda_i: 84 image
sunda_ja: 87 image
sunda_ka: 87 image
sunda_la: 84 image
sunda_ma: 84 image
sunda_na: 84 image
sunda_nga: 84 image
sunda_nya: 84 image
sunda_o: 85 image
sunda_pa: 87 image
sunda_qa: 84 image
sunda_ra: 87 image
sunda_sa: 87 image
sunda_ta: 87 image
sunda_u: 84 image
sunda_va: 84 image
sunda_wa: 87 image
sunda_xa: 84 image
sunda_ya: 87 image
sunda_za: 84 image
validation:
sunda_a: 18 image
sunda_ae: 18 image
sunda_ba: 18 image
sunda_ca: 18 image
sunda_da: 18 image
sunda_e: 18 image
sunda_eu: 18 image
sunda_fa: 18 image
sunda_ga: 18 image
sunda_ha: 18 image
sunda_i: 18 image
sunda_ja: 18 image
sunda_ka: 18 image
sunda_la: 18 image
sunda_ma: 18 image
sunda_na: 18 image
sunda_nga: 18 image
sunda_nya: 18 image
sunda_o: 18 image
sunda_pa: 18 image
sunda_qa: 18 image
sunda_ra: 18 i

In [None]:
import os

len('/content/training/sunda_ca')

26

In [None]:
import os

In [None]:
%cd

/root


In [None]:
!ls

sunda_dataset  testing	training  validation


In [None]:
import shutil

def zip_folder(folder_path, zip_path):
    shutil.make_archive(zip_path, 'zip', folder_path)

# ZIP Image data to be downloaded
folder_path = 'aksara_dataset_processed'
zip_path = 'aksara_dataset_processed'
zip_folder(folder_path, zip_path)

In [None]:
!zip -r sunda.zip /content/training /content/testing /content/validation

  adding: content/training/ (stored 0%)
  adding: content/training/sunda_ca/ (stored 0%)
  adding: content/training/sunda_ca/ca_49.jpg (deflated 10%)
  adding: content/training/sunda_ca/ca_106.jpg (deflated 24%)
  adding: content/training/sunda_ca/ca_15.jpg (deflated 15%)
  adding: content/training/sunda_ca/ca_84.jpg (deflated 21%)
  adding: content/training/sunda_ca/ca_119.jpg (deflated 24%)
  adding: content/training/sunda_ca/ca_55.jpg (deflated 12%)
  adding: content/training/sunda_ca/ca_83.jpg (deflated 25%)
  adding: content/training/sunda_ca/ca_43.jpg (deflated 11%)
  adding: content/training/sunda_ca/ca_30.jpg (deflated 27%)
  adding: content/training/sunda_ca/ca_114.jpg (deflated 26%)
  adding: content/training/sunda_ca/ca_96.jpg (deflated 21%)
  adding: content/training/sunda_ca/ca_52.jpg (deflated 12%)
  adding: content/training/sunda_ca/ca_53.jpg (deflated 13%)
  adding: content/training/sunda_ca/ca_120.jpg (deflated 24%)
  adding: content/training/sunda_ca/ca_29.jpg (deflat

In [None]:
from google.colab import files
files.download('sunda.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>