In [1]:
#Dependencies
from PIL import Image
import shutil, os
import pandas as pd


In [23]:
#Using the data split defined in the classification folder, train.txt and test.txt
train_file = "data/train_test_split/classification/train.txt"
test_file = "data/train_test_split/classification/test.txt"
classification_dir = "data/train_test_split/classification"
output_dir = "data/output"


In [24]:
#Function that copies images into output/test and output/train based on the classification_dir

def train_test_split(input_file):
    split_type = 'train' if 'train' in input_file else 'test'

    #Construct paths
    input_path = os.path.join(classification_dir, input_file)
    output_path = os.path.join(output_dir, split_type)


    with open(input_path, 'r') as file:
        for line in file:

            #Construct image source path
            relative_image_path = line.strip()
            source_image_path = os.path.join('data', 'image', relative_image_path)

            #Construct image destination path
            dest_image_path = os.path.join(output_path, relative_image_path)

            # Ensure the destination subfolder exists
            dest_folder_path = os.path.dirname(dest_image_path)
            if not os.path.exists(dest_folder_path):
                os.makedirs(dest_folder_path)
            
            #Copy the image
            shutil.copy2(source_image_path, dest_image_path)
            print(f"Copied to: {dest_image_path}")

#train_test_split('train.txt')
train_test_split('test.txt')


Copied to: data/output/test/78/1/2014/3ac218c0c6c378.jpg
Copied to: data/output/test/78/1/2010/588feea1fcc809.jpg
Copied to: data/output/test/78/1/2010/071c428717ceca.jpg
Copied to: data/output/test/78/1/2010/4c119af3fdb787.jpg
Copied to: data/output/test/78/1/2014/1453dbb876c191.jpg
Copied to: data/output/test/78/1/2013/5a9cb4ee24047d.jpg
Copied to: data/output/test/78/1/2012/106f7ea2150ff7.jpg
Copied to: data/output/test/78/1/2012/6481ae22830294.jpg
Copied to: data/output/test/78/1/2013/23bc02fcf393f8.jpg
Copied to: data/output/test/78/1/2014/84919e1b067a85.jpg
Copied to: data/output/test/78/1/2013/603cd9ca33d572.jpg
Copied to: data/output/test/78/1/2012/f5a52935ee6fc2.jpg
Copied to: data/output/test/78/1/2012/80e52b10388f9e.jpg
Copied to: data/output/test/78/1/2010/42fc7c8524392d.jpg
Copied to: data/output/test/78/1/2014/2d7c992c1d6f4b.jpg
Copied to: data/output/test/78/1/2010/f66204595e1e95.jpg
Copied to: data/output/test/78/1/2010/6f64cf12a569dc.jpg
Copied to: data/output/test/78/

In [30]:
output_dir = 'data/output'
label_dir = 'data/label'
cropped_dir = 'data/cropped_images'


In [31]:
def crop_image(image_path, label_path, dest_path):
    
    #Read the label file
    with open(label_path, 'r') as file:
        lines = file.readlines()
        num_bounding_boxes = int(lines[1].strip())
        print("num_bounding_boxes: ", num_bounding_boxes)
        # Check if there's exactly one bounding box
        if num_bounding_boxes != 1:
            raise ValueError("The number of bounding boxes is not 1")
        
        x1, y1, x2, y2 = map(int, lines[2].strip().split())

        #Cropping the image
        with Image.open(image_path) as img:
            cropped_image = img.crop((x1, y1, x2, y2))

        #Constructing destination path
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)

        print('dest_path: ', dest_path)
        #Save cropped image
        cropped_image.save(dest_path)

#TESTINGTESTING
img_path = 'data/output/train/4/509/2012/4ce1a60fe6c0ed.jpg'

label_path = 'data/label/4/509/2012/4ce1a60fe6c0ed.txt'

#crop_image(img_path, label_path)


In [44]:
#Cropping all images
def process_images(folder): #Folder is train or test
    src_folder = os.path.join(output_dir, folder) #data/output/testOrTrain

    for root,_, files in os.walk(src_folder):
        for file in files:
            if file.endswith('.jpg'):
                img_path = os.path.join(root, file)
                relative_path = os.path.relpath(img_path, output_dir)
                
                #Removes /train or /test
                path_components = relative_path.split(os.path.sep)  # Use os.path.sep directly
                path_components = path_components[1:]  # Remove the first element ('train' or 'test')
                relative_path_stripped = os.path.join(*path_components)
                print("relative_path_stripped: ", relative_path_stripped)

                label_path = os.path.join(label_dir, os.path.splitext(relative_path_stripped)[0] + '.txt')
                dest_path = os.path.join(cropped_dir, relative_path)

                print("label_path: ", label_path)
                print("dest_path: ", dest_path)
                print("relative_path: ", relative_path)
        
                if os.path.exists(label_path):
                    try:
                        crop_image(img_path, label_path, dest_path)
                    except Exception as e:
                        print(f"Error processing {img_path}: {str(e)}")
                else:
                    print("Something wrong with label file")
                    break


In [45]:
#TESTING
process_images('train')

relative_path_stripped:  95/915/2013/b3971ffc3ab5f8.jpg
label_path:  data/label/95/915/2013/b3971ffc3ab5f8.txt
dest_path:  data/cropped_images/train/95/915/2013/b3971ffc3ab5f8.jpg
relative_path:  train/95/915/2013/b3971ffc3ab5f8.jpg
num_bounding_boxes:  1
dest_path:  data/cropped_images/train/95/915/2013/b3971ffc3ab5f8.jpg
relative_path_stripped:  95/915/2013/62d3a2d1195213.jpg
label_path:  data/label/95/915/2013/62d3a2d1195213.txt
dest_path:  data/cropped_images/train/95/915/2013/62d3a2d1195213.jpg
relative_path:  train/95/915/2013/62d3a2d1195213.jpg
num_bounding_boxes:  1
dest_path:  data/cropped_images/train/95/915/2013/62d3a2d1195213.jpg
relative_path_stripped:  95/915/2013/5148e912cdc977.jpg
label_path:  data/label/95/915/2013/5148e912cdc977.txt
dest_path:  data/cropped_images/train/95/915/2013/5148e912cdc977.jpg
relative_path:  train/95/915/2013/5148e912cdc977.jpg
num_bounding_boxes:  1
dest_path:  data/cropped_images/train/95/915/2013/5148e912cdc977.jpg
relative_path_stripped:  