In [1]:
import os
import pandas as pd

data = []
root_dir = 'dataset'

for label in os.listdir(root_dir):
    if label == '.DS_Store': continue
    
    label_path = os.path.join(root_dir, label)
    
    if os.path.isdir(label_path):
        for image in os.listdir(label_path):
            if image.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(label_path, image)
                data.append({'Image': image_path, 'Label': label})

df = pd.DataFrame(data)
df.tail()

Unnamed: 0,Image,Label
751223,data/train/25/184.jpg,25
751224,data/train/25/2119.jpg,25
751225,data/train/25/_1_981.jpg,25
751226,data/train/25/1376.jpg,25
751227,data/train/25/_1_759.jpg,25


In [2]:
df.head()

Unnamed: 0,Image,Label
0,data/train/R/63.jpg,R
1,data/train/R/_1_740.jpg,R
2,data/train/R/_1_5907.jpg,R
3,data/train/R/823.jpg,R
4,data/train/R/_1_998.jpg,R


In [8]:
labels = os.listdir(root_dir)
labels.remove('.DS_Store')
labels = sorted(labels)
print(labels)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']


In [11]:
encoded = {labels[i]: str(i) for i in range(len(labels))}
print(encoded)

{'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 'A': '10', 'B': '11', 'C': '12', 'D': '13', 'E': '14', 'F': '15', 'G': '16', 'H': '17', 'I': '18', 'J': '19', 'K': '20', 'L': '21', 'M': '22', 'N': '23', 'P': '24', 'Q': '25', 'R': '26', 'S': '27', 'T': '28', 'U': '29', 'V': '30', 'W': '31', 'X': '32', 'Y': '33', 'Z': '34'}


In [13]:
import cv2

def process_images(dir, data):
    for idx, row in data.iterrows():
        image_path = row['Image']
        label = str(row['Label'])
        
        label_directory = os.path.join(dir, label)
        os.makedirs(label_directory, exist_ok=True)
        
        image = cv2.imread(image_path)
        
        if image is not None:
            # inverted_image = cv2.bitwise_not(image)
            image_filename = os.path.basename(image_path)
            
            save_path = os.path.join(label_directory, image_filename)
            cv2.imwrite(save_path, image)
        else:
            print(f'Error loading image: {image_path}')

    print(f'{idx - 1} images copied to {dir}')

In [14]:
process_images(root_dir, df)

751226 images copied to data/train


In [5]:
train_dir = 'data2/train'
valid_dir = 'data2/validation'

In [6]:
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '+', '-', '(', ')']

In [7]:
encoded = {labels[i]: str(i) for i in range(len(labels))}
print(encoded)

{'0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 'A': '10', 'B': '11', 'C': '12', 'D': '13', 'E': '14', 'F': '15', 'G': '16', 'H': '17', 'I': '18', 'J': '19', 'K': '20', 'L': '21', 'M': '22', 'N': '23', 'P': '24', 'Q': '25', 'R': '26', 'S': '27', 'T': '28', 'U': '29', 'V': '30', 'W': '31', 'X': '32', 'Y': '33', 'Z': '34', '+': '35', '-': '36', '(': '37', ')': '38'}


In [8]:
import os
import shutil

for dir in [train_dir, valid_dir]:
    folders = os.listdir(dir)

    for folder in folders:
        if folder in encoded:
            old_path = os.path.join(dir, folder)
            new_name = encoded[folder]
            new_path = os.path.join(dir, new_name)

            shutil.move(old_path, new_path)
            print(f'Renamed {folder} to {new_name}')
        else:
            print(f'No mapping found for {folder}')

No mapping found for .DS_Store
Renamed R to 26
Renamed U to 29
Renamed 9 to 9
Renamed 0 to 0
Renamed 7 to 7
Renamed I to 18
Renamed N to 23
Renamed G to 16
Renamed + to 35
Renamed 6 to 6
Renamed Z to 34
Renamed 1 to 1
Renamed 8 to 8
Renamed T to 28
Renamed S to 27
Renamed A to 10
Renamed - to 36
Renamed F to 15
Renamed H to 17
Renamed M to 22
Renamed J to 19
Renamed C to 12
Renamed ( to 37
Renamed D to 13
Renamed V to 30
Renamed Q to 25
Renamed 4 to 4
Renamed X to 32
Renamed 3 to 3
Renamed E to 14
Renamed ) to 38
Renamed B to 11
Renamed K to 20
Renamed L to 21
Renamed 2 to 2
Renamed Y to 33
Renamed 5 to 5
Renamed P to 24
Renamed W to 31
No mapping found for .DS_Store
Renamed R to 26
Renamed U to 29
Renamed 9 to 9
Renamed 0 to 0
Renamed 7 to 7
Renamed I to 18
Renamed N to 23
Renamed G to 16
Renamed + to 35
Renamed 6 to 6
Renamed Z to 34
Renamed 1 to 1
Renamed 8 to 8
Renamed T to 28
Renamed S to 27
Renamed A to 10
Renamed - to 36
Renamed F to 15
Renamed H to 17
Renamed M to 22
Renamed J 