In [1]:
#Dependencies
from PIL import Image
import shutil, os, re
import pandas as pd


In [2]:
#Using the data split defined in the classification folder, train.txt and test.txt
train_file = "data/train_test_split/classification/train.txt"
test_file = "data/train_test_split/classification/test.txt"
classification_dir = "data/train_test_split/classification"
output_dir = "data/output"


In [50]:
#Function that copies images into output/test and output/train based on the classification_dir

def train_test_split(input_file):
    split_type = 'train' if 'train' in input_file else 'test'

    #Construct paths
    input_path = os.path.join(classification_dir, input_file)
    output_path = os.path.join(output_dir, split_type)


    with open(input_path, 'r') as file:
        for line in file:

            #Construct image source path
            relative_image_path = line.strip()
            source_image_path = os.path.join('data', 'image', relative_image_path)

            #Construct image destination path
            dest_image_path = os.path.join(output_path, relative_image_path)

            # Ensure the destination subfolder exists
            dest_folder_path = os.path.dirname(dest_image_path)
            if not os.path.exists(dest_folder_path):
                os.makedirs(dest_folder_path)
            
            #Copy the image
            shutil.copy2(source_image_path, dest_image_path)
            print(f"Copied to: {dest_image_path}")

#train_test_split('train.txt')
#train_test_split('test.txt')


In [18]:
output_dir = 'data/output'
label_dir = 'data/label'
resized_dir = 'data/resized_images'


In [36]:
def resize_image(image_path, dest_folder, resize_size=(96, 96)):
    # Determine the subfolder based on the image path
    split_type = 'train' if 'train' in image_path else 'test'
    
    # Resizing the image to the fixed size
    with Image.open(image_path) as img:
        resized_image = img.resize(resize_size)

    # Constructing the new filename
    base_name = os.path.basename(image_path)
    new_name = '_'.join(image_path.split(os.sep)[-4:])  # Adjust the split depth as needed
    new_name = new_name.replace(os.sep, '_')
    

    # Constructing destination path
    dest_path = os.path.join(dest_folder, split_type, new_name)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

    # Save resized image
    resized_image.save(dest_path)
    print('Saved resized image to:', dest_path)


In [25]:
resize_image('data/output/test/26/1692/2010/5f67724da84377.jpg' ,'resized_images')


Saved resized image to: resized_images/test/26_1692_2010_5f67724da84377.jpg


In [8]:

def rename_image_classes(directory):
    # Step 1: Read all image filenames in the directory
    filenames = os.listdir(directory)
    
    # Step 2: Extract the label from each filename
    label_pattern = re.compile(r'^(\d+)_')
    labels = set()
    for filename in filenames:
        match = label_pattern.match(filename)
        if match:
            labels.add(int(match.group(1)))
    
    # Step 3: Map old labels to new labels starting from 1
    sorted_labels = sorted(labels)
    label_map = {old_label: new_label for new_label, old_label in enumerate(sorted_labels, start=0)}
    
    # Print the label mapping
    print("Label Mapping:")
    for old_label, new_label in label_map.items():
        print(f"{old_label} -> {new_label}")
        
    # Step 4: Rename the files with the new labels
    for filename in filenames:
        match = label_pattern.match(filename)
        if match:
            old_label = int(match.group(1))
            new_label = label_map[old_label]
            new_filename = filename.replace(f"{old_label}_", f"{new_label}_", 1)
            os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))


    

In [11]:
# Usage
rename_image_classes('data/resized_images/test')
rename_image_classes('data/resized_images/train')
rename_image_classes('data/resized_images/validation')

Label Mapping:
4 -> 0
11 -> 1
14 -> 2
15 -> 3
26 -> 4
27 -> 5
28 -> 6
29 -> 7
34 -> 8
35 -> 9
36 -> 10
37 -> 11
38 -> 12
39 -> 13
43 -> 14
45 -> 15
48 -> 16
50 -> 17
51 -> 18
53 -> 19
54 -> 20
55 -> 21
58 -> 22
64 -> 23
68 -> 24
69 -> 25
70 -> 26
71 -> 27
73 -> 28
75 -> 29
76 -> 30
77 -> 31
78 -> 32
80 -> 33
81 -> 34
82 -> 35
84 -> 36
87 -> 37
88 -> 38
89 -> 39
92 -> 40
94 -> 41
95 -> 42
97 -> 43
100 -> 44
102 -> 45
103 -> 46
105 -> 47
106 -> 48
108 -> 49
111 -> 50
114 -> 51
118 -> 52
119 -> 53
120 -> 54
121 -> 55
122 -> 56
128 -> 57
131 -> 58
132 -> 59
133 -> 60
134 -> 61
138 -> 62
140 -> 63
142 -> 64
148 -> 65
149 -> 66
150 -> 67
152 -> 68
155 -> 69
157 -> 70
158 -> 71
159 -> 72
160 -> 73
162 -> 74


In [29]:
#Cropping all images
def process_images(folder): #Folder is train or test
    src_folder = os.path.join(output_dir, folder) #data/output/testOrTrain

    for root,_, files in os.walk(src_folder):
        for file in files:
            if file.endswith('.jpg'):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, output_dir)
                
                #Removes /train or /test
                path_components = relative_path.split(os.path.sep)  # Use os.path.sep directly
                path_components = path_components[1:]  # Remove the first element ('train' or 'test')
                relative_path_stripped = os.path.join(*path_components)
                print("relative_path_stripped: ", relative_path_stripped)

                label_path = os.path.join(label_dir, os.path.splitext(relative_path_stripped)[0] + '.txt')
                dest_path = os.path.join(resized_dir)

                print("label_path: ", label_path)
                print("dest_path: ", dest_path)
                print("relative_path: ", relative_path)
        
                if os.path.exists(label_path):
                    try:
                        resize_image(img_path, dest_path)
                    except Exception as e:
                        print(f"Error processing {img_path}: {str(e)}")
                else:
                    print("Something wrong with label file")
                    break


In [38]:
#TESTING
process_images('test')

relative_path_stripped:  95/915/2013/3c7862c4f1bef7.jpg
label_path:  data/label/95/915/2013/3c7862c4f1bef7.txt
dest_path:  data/resized_images
relative_path:  test/95/915/2013/3c7862c4f1bef7.jpg
Saved resized image to: data/resized_images/test/95_915_2013_3c7862c4f1bef7.jpg
relative_path_stripped:  95/915/2014/002e9a5f523c87.jpg
label_path:  data/label/95/915/2014/002e9a5f523c87.txt
dest_path:  data/resized_images
relative_path:  test/95/915/2014/002e9a5f523c87.jpg
Saved resized image to: data/resized_images/test/95_915_2014_002e9a5f523c87.jpg
relative_path_stripped:  95/915/2014/281f099987270a.jpg
label_path:  data/label/95/915/2014/281f099987270a.txt
dest_path:  data/resized_images
relative_path:  test/95/915/2014/281f099987270a.jpg
Saved resized image to: data/resized_images/test/95_915_2014_281f099987270a.jpg
relative_path_stripped:  95/915/2014/9d29c2a28eb8b1.jpg
label_path:  data/label/95/915/2014/9d29c2a28eb8b1.txt
dest_path:  data/resized_images
relative_path:  test/95/915/2014

In [39]:
def create_validation_dataset(test_folder, validation_folder):
    # Ensure the validation folder exists
    os.makedirs(validation_folder, exist_ok=True)

    images = [f for f in os.listdir(test_folder) if os.path.isfile(os.path.join(test_folder, f))]

    # Loop through the images
    for index, image in enumerate(images):
        if index % 2 == 1:  # For every second image (0-based index)
            src_path = os.path.join(test_folder, image)
            dest_path = os.path.join(validation_folder, image)
            
            # Move the image to the validation folder
            shutil.move(src_path, dest_path)
            print(f'Moved {image} to validation folder')


In [40]:
test_folder = './data/resized_images/test'
validation_folder = './data/resized_images/validation'
create_validation_dataset(test_folder, validation_folder)

Moved 100_252_2012_075badffbc9045.jpg to validation folder
Moved 81_70_2014_a63b9f679f9466.jpg to validation folder
Moved 157_1905_2011_175aea7a0d3a11.jpg to validation folder
Moved 77_156_2013_69cae99153c7a4.jpg to validation folder
Moved 54_188_2013_58e84bc126a3e0.jpg to validation folder
Moved 89_1786_2012_6e3581f26d7342.jpg to validation folder
Moved 11_735_2014_4405b1f050ead7.jpg to validation folder
Moved 119_1819_2014_c3df90134cc147.jpg to validation folder
Moved 111_1702_2011_a69949b7feb668.jpg to validation folder
Moved 4_512_2014_2df8a5a9d4f1ac.jpg to validation folder
Moved 108_1543_2012_873b69b6ac74f3.jpg to validation folder
Moved 111_1702_2013_ded58750a1c8b1.jpg to validation folder
Moved 97_868_2010_9800bf871a8ee3.jpg to validation folder
Moved 54_194_2013_9541190a84d589.jpg to validation folder
Moved 75_1654_2010_8cbcedb2024b5b.jpg to validation folder
Moved 38_1959_2008_d4f959e1ad546b.jpg to validation folder
Moved 70_1859_2012_dec6da2d00ba8b.jpg to validation folder
M