# MARCO IMPORT
This file will import the TFRecords files from MARCO and convert, resize and output the images.

In [1]:
import os
import tensorflow as tf
from PIL import Image
import os




### Import TFRecords
download the TFRecords file from the MARCO website and place them in the MARCO_TFRecords folder.

In [6]:
feature_description = {
    'image/height': tf.io.FixedLenFeature([], tf.int64),
    'image/width': tf.io.FixedLenFeature([], tf.int64),
    'image/colorspace': tf.io.FixedLenFeature([], tf.string),
    'image/channels': tf.io.FixedLenFeature([], tf.int64),
    'image/class/label': tf.io.FixedLenFeature([], tf.int64),
    'image/class/raw': tf.io.FixedLenFeature([], tf.int64),
    'image/class/source': tf.io.FixedLenFeature([], tf.int64),
    'image/class/text': tf.io.FixedLenFeature([], tf.string),
    'image/format': tf.io.FixedLenFeature([], tf.string),
    'image/filename': tf.io.FixedLenFeature([], tf.string),
    'image/id': tf.io.FixedLenFeature([], tf.int64),
    'image/encoded': tf.io.FixedLenFeature([], tf.string),
}

def save_images_by_label(example, output_directory):
    """
    saves the TFRecord contents as a jpg, in folders for each class:
    0 - Clear
    1 - Crystals
    2 - Other
    3 - Precipitate
    """
    example = tf.io.parse_single_example(example, feature_description)
    image = tf.image.decode_jpeg(example['image/encoded'], channels=3)  # Decode JPEG image
    label = example['image/class/label'].numpy()  # Get label value as numpy int

    # Create a folder for each label if it doesn't exist
    label_dir = os.path.join(output_directory, str(label))
    if not os.path.exists(label_dir):
        os.makedirs(label_dir)

    # Save the image to the respective label folder
    image_filename = example['image/filename'].numpy().decode('utf-8')  # Get filename as string
    image_path = os.path.join(label_dir, image_filename)
    tf.io.write_file(image_path, tf.image.encode_jpeg(image))


tfrecord_directory = 'MARCO_TFRecords'
tfrecord_files = os.listdir(tfrecord_directory)

for file_name in tfrecord_files:

    # Read TFRecord file
    tfrecord_file = os.path.join(tfrecord_directory, file_name)
    output_directory = 'MARCO_IMAGES'
    dataset = tf.data.TFRecordDataset(tfrecord_file)
    print("current file:", file_name)

    # Process and save images by label
    for record in dataset:
        save_images_by_label(record, output_directory)

Current file: train-00001-of-00407
X0000039760279200409281034.png
folderID125_plateID3125_batchID18048_wellNum22_profileID1_d1_r267583_ef.jpg
X0000042171126200411151543.png
folderID281_plateID1281_batchID4174_wellNum16_profileID1_d2_r75863_ef.jpg
folderID511_plateID3511_batchID20741_wellNum58_profileID1_d1_r315192_ef.jpg
06d7_E4_CrossPolarization_9.jpg
folderID647_plateID2647_batchID13388_wellNum72_profileID1_d2_r213829_ef.jpg
folderID987_plateID987_batchID1639_wellNum74_profileID1_d2_r40461_ef.jpg
06hl_A5_ImagerDefaults_0.jpg
folderID440_plateID1440_batchID5495_wellNum68_profileID1_d1_r90498_ef.jpg
X0000043780431200412291330.png
046p_H9_ImagerDefaults_2.jpg
folderID828_plateID1828_batchID8015_wellNum89_profileID1_d2_r122259_ef.jpg
folderID241_plateID3241_batchID19740_wellNum90_profileID1_d2_r284022_ef.jpg
folderID505_plateID1505_batchID5837_wellNum26_profileID1_d1_r95274_ef.jpg
folderID242_plateID3242_batchID19196_wellNum13_profileID1_d2_r282524_ef.jpg
folderID643_plateID2643_batchID1

### Resize 
Resize the images to 608x608 and organize them into folders.

In [3]:
def resize_images_in_folders(root_folder):
    # Define the output root directory for resized images
    output_root = os.path.join("MARCO_IMAGES_resized")
    os.makedirs(output_root, exist_ok=True)

    # Iterate through the numbered folders
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)
        
        # Check if the item in the root folder is a directory
        if os.path.isdir(folder_path):
            output_folder = os.path.join(output_root, folder_name)
            os.makedirs(output_folder, exist_ok=True)

            # Loop through images in subfolders and resize them
            for subdir, _, files in os.walk(folder_path):
                for filename in files:
                    filepath = os.path.join(subdir, filename)
                    
                    # Check if the file is an image
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
                        with Image.open(filepath) as img:
                            # Resize the image to 608x608
                            img_resized = img.resize((608, 608))
                            
                            # Save the resized image in the corresponding output folder
                            output_filename = os.path.join(output_folder, filename)
                            img_resized.save(output_filename, "JPEG")
                            print(f"Resized and saved: {output_filename}")

# Provide the root folder containing subfolders (0, 1, 2, 3) with images
root_folder_path = "MARCO_IMAGES"
resize_images_in_folders(root_folder_path)

print("task finished")

Resized and saved: MARCO_IMAGES_resized\0\00k5_F9_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\01ge_F5_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\01ya_D9_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\01ya_F9_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\02nm_F1_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\02no_C8_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\02nv_E12_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\02p2_F9_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\02ua_F3_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\046l_B4_ImagerDefaults_1.jpg
Resized and saved: MARCO_IMAGES_resized\0\047d_A6_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\0494_E6_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\04pd_A6_ImagerDefaults_0.jpg
Resized and saved: MARCO_IMAGES_resized\0\04rs_B11_ImagerDefaults_0.jpg
Resi