In [1]:
#Dependencies
from PIL import Image
import shutil, os, re
import pandas as pd

In [2]:
#Using the data split defined in the classification folder, train.txt and test.txt
classification_dir = "data/train_test_split/classification"

output_dir = "data/output"
cropped_dir = "data/cropped_images"
label_dir = 'data/label'
resized_dir = 'data/resized_images'

In [3]:
#Function that copies images into output/test and output/train based on the classification_dir
def train_test_split(input_file):
    split_type = 'train' if 'train' in input_file else 'test'

    #Construct paths
    input_path = os.path.join(classification_dir, input_file)
    output_path = os.path.join(output_dir, split_type)

    with open(input_path, 'r') as file:
        for line in file:

            #Construct image source path
            relative_image_path = line.strip()
            source_image_path = os.path.join('data', 'image', relative_image_path)

            #Construct image destination path
            dest_image_path = os.path.join(output_path, relative_image_path)

            # Ensure the destination subfolder exists
            dest_folder_path = os.path.dirname(dest_image_path)
            if not os.path.exists(dest_folder_path):
                os.makedirs(dest_folder_path)
            
            #Copy the image
            shutil.copy2(source_image_path, dest_image_path)



In [4]:
train_test_split('train.txt')
train_test_split('test.txt')

In [5]:
#Function for cropping the images based on the bounding box defined in the corresponding label file
def crop_image(image_path, label_path, dest_path):
    
    #Read the label file
    with open(label_path, 'r') as file:
        lines = file.readlines()
        num_bounding_boxes = int(lines[1].strip())

        if num_bounding_boxes != 1:
            raise ValueError("The number of bounding boxes is not 1")
        
        x1, y1, x2, y2 = map(int, lines[2].strip().split())

        #Cropping the image with bounding box
        with Image.open(image_path) as img:
            cropped_image = img.crop((x1, y1, x2, y2))

        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        cropped_image.save(dest_path)

In [6]:
#Cropping all images based on bounding box
def crop_images(folder):
    src_folder = os.path.join(output_dir, folder)

    for root,_, files in os.walk(src_folder):
        for file in files:
            if file.endswith('.jpg'):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, output_dir)
                
                #Removes /train or /test
                path_components = relative_path.split(os.path.sep)  # Use os.path.sep directly
                path_components = path_components[1:]  # Remove the first element ('train' or 'test')
                relative_path_stripped = os.path.join(*path_components)

                label_path = os.path.join(label_dir, os.path.splitext(relative_path_stripped)[0] + '.txt')
                dest_path = os.path.join(cropped_dir, relative_path)
        
                if os.path.exists(label_path):
                    try:
                        crop_image(img_path, label_path, dest_path)
                    except Exception as e:
                        print(f"Error processing {img_path}: {str(e)}")
                else:
                    print("Something wrong with label file")
                    break

In [7]:
crop_images('train')
crop_images('test')

In [8]:
#Function for resizing images to 225x225 pixels
def resize_image(image_path, dest_folder, resize_size=(225, 225)):
    split_type = 'train' if 'train' in image_path else 'test'
    
    # Resizing the image to the fixed size
    with Image.open(image_path) as img:
        resized_image = img.resize(resize_size)

    base_name = os.path.basename(image_path)
    new_name = '_'.join(image_path.split(os.sep)[-4:])  # Adjust the split depth as needed
    new_name = new_name.replace(os.sep, '_')
    
    dest_path = os.path.join(dest_folder, split_type, new_name)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

    resized_image.save(dest_path)


In [9]:
#Resizing all images
def process_images(folder): #Folder is train or test
    src_folder = os.path.join(cropped_dir, folder) #data/output/testOrTrain

    for root,_, files in os.walk(src_folder):
        for file in files:
            if file.endswith('.jpg'):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, cropped_dir)
                
                #Removes /train or /test
                path_components = relative_path.split(os.path.sep)  # Use os.path.sep directly
                path_components = path_components[1:]  # Remove the first element ('train' or 'test')
                relative_path_stripped = os.path.join(*path_components)

                label_path = os.path.join(label_dir, os.path.splitext(relative_path_stripped)[0] + '.txt')
                dest_path = os.path.join(resized_dir)
                
                if os.path.exists(label_path):
                    try:
                        resize_image(img_path, dest_path)
                    except Exception as e:
                        print(f"Error processing {img_path}: {str(e)}")
                else:
                    print("Something wrong with label file")
                    break


In [10]:
process_images('train')
process_images('test')

In [11]:
#Creates a validation set based on the test set
def create_validation_dataset(test_folder, validation_folder):
    os.makedirs(validation_folder, exist_ok=True)

    images = [f for f in os.listdir(test_folder) if os.path.isfile(os.path.join(test_folder, f))]

    for index, image in enumerate(images):
        if index % 2 == 1:  # For every second image
            src_path = os.path.join(test_folder, image)
            dest_path = os.path.join(validation_folder, image)
            
            # Move the image to the validation folder
            shutil.move(src_path, dest_path)


In [12]:
test_folder = './data/resized_images/test'
validation_folder = './data/resized_images/validation'
create_validation_dataset(test_folder, validation_folder)

In [13]:
#Renames the images classes from 0-74

def rename_image_classes(directory):
    # Step 1: Read all image filenames in the directory
    filenames = os.listdir(directory)
    
    #Extract the label from each filename
    label_pattern = re.compile(r'^(\d+)_')
    labels = set()
    for filename in filenames:
        match = label_pattern.match(filename)
        if match:
            labels.add(int(match.group(1)))
    
    #Map old labels to new labels starting from 0
    sorted_labels = sorted(labels)
    label_map = {old_label: new_label for new_label, old_label in enumerate(sorted_labels, start=0)}
    
    for old_label, new_label in label_map.items():
        print(f"{old_label} -> {new_label}")
        
    #Rename the files with the new labels
    for filename in filenames:
        match = label_pattern.match(filename)
        if match:
            old_label = int(match.group(1))
            new_label = label_map[old_label]
            new_filename = filename.replace(f"{old_label}_", f"{new_label}_", 1)
            os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))

In [14]:
# Usage
rename_image_classes('data/resized_images/test')
rename_image_classes('data/resized_images/train')
rename_image_classes('data/resized_images/validation')

4 -> 0
11 -> 1
14 -> 2
15 -> 3
26 -> 4
27 -> 5
28 -> 6
29 -> 7
34 -> 8
35 -> 9
36 -> 10
37 -> 11
38 -> 12
39 -> 13
43 -> 14
45 -> 15
48 -> 16
50 -> 17
51 -> 18
53 -> 19
54 -> 20
55 -> 21
58 -> 22
64 -> 23
68 -> 24
69 -> 25
70 -> 26
71 -> 27
73 -> 28
75 -> 29
76 -> 30
77 -> 31
78 -> 32
80 -> 33
81 -> 34
82 -> 35
84 -> 36
87 -> 37
88 -> 38
89 -> 39
92 -> 40
94 -> 41
95 -> 42
97 -> 43
100 -> 44
102 -> 45
103 -> 46
105 -> 47
106 -> 48
108 -> 49
111 -> 50
114 -> 51
118 -> 52
119 -> 53
120 -> 54
121 -> 55
122 -> 56
128 -> 57
131 -> 58
132 -> 59
133 -> 60
134 -> 61
138 -> 62
140 -> 63
142 -> 64
148 -> 65
149 -> 66
150 -> 67
152 -> 68
155 -> 69
157 -> 70
158 -> 71
159 -> 72
160 -> 73
162 -> 74
4 -> 0
11 -> 1
14 -> 2
15 -> 3
26 -> 4
27 -> 5
28 -> 6
29 -> 7
34 -> 8
35 -> 9
36 -> 10
37 -> 11
38 -> 12
39 -> 13
43 -> 14
45 -> 15
48 -> 16
50 -> 17
51 -> 18
53 -> 19
54 -> 20
55 -> 21
58 -> 22
64 -> 23
68 -> 24
69 -> 25
70 -> 26
71 -> 27
73 -> 28
75 -> 29
76 -> 30
77 -> 31
78 -> 32
80 -> 33
81 -> 34
8