#Train images merging all images and captions from different classes and cleanup

In [5]:
import os
import shutil

# Function to read captions from a file
def read_captions(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        captions = f.readlines()
    captions = [caption.strip() for caption in captions]
    return captions

# Path to the directory containing image folders
data_dir = 'selected_images_coco_train'

# Output folder for all images and captions
output_folder = 'Mergeddataset_train'
os.makedirs(output_folder, exist_ok=True)

# Output folder for images
output_image_folder = os.path.join(output_folder, 'Images')
os.makedirs(output_image_folder, exist_ok=True)

# Dictionary to store all captions by image filename
image_captions = {}
total_imagesCount = 0

# Loop through each folder
for folder_name in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder_name)
    if os.path.isdir(folder_path):
        # Count of images in the folder
        image_count = 0
        
        # Loop through image files in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.jpg'):  # Assuming images have .jpg extension
                image_count += 1
                # Copy image to the output folder
                img_path = os.path.join(folder_path, file_name)
                shutil.copy(img_path, output_image_folder)
                image_id = (file_name.split('_')[2]).split('.')[0]
                
                
                image_id = image_id.lstrip("0")
                # Read captions from the corresponding caption file
                caption_file_name = image_id + '_captions.txt'
                caption_file_path = os.path.join(folder_path, caption_file_name)
                captions = read_captions(caption_file_path)
                length_of_captions = len(captions)
                # Add image filename and its captions to the dictionary            

                if length_of_captions > 5:                   
                    captions = [caption for caption in captions if caption.strip()]
                image_captions[file_name] = captions

        # Print total count of images in the folder
        total_imagesCount = total_imagesCount + image_count
        print(f"Class '{folder_name}' contains {image_count} images.")

# Write all image filenames and their respective captions to a single text file
print('totalimages', total_imagesCount)
output_caption_file = os.path.join(output_folder, 'image_captions.txt')
with open(output_caption_file, 'w', encoding='utf-8') as f:
    for img_name, captions in image_captions.items():
        for caption in captions:
            f.write(f"{img_name}, {caption}\n")


Class 'baseball' contains 2441 images.
Class 'bicycle' contains 973 images.
Class 'birds' contains 1576 images.
Class 'boat' contains 1561 images.
Class 'bus' contains 2127 images.
Class 'car' contains 3155 images.
Class 'cat' contains 2794 images.
Class 'dog' contains 3537 images.
Class 'elephant' contains 1126 images.
Class 'horse' contains 1518 images.
Class 'motorbikes' contains 150 images.
Class 'plane' contains 2255 images.
Class 'surfboard' contains 1720 images.
Class 'train' contains 2651 images.
Class 'truck' contains 1404 images.
Class 'zebra' contains 1405 images.
totalimages 30393
