In [7]:
import pandas as pd
import os
import cv2
import numpy as np

# Step 1: Load the image-label mapping table
mapping_table_path = "Image_labels\\combined_tables.csv"  # Update with the path to your mapping table
mapping_table = pd.read_csv(mapping_table_path)

# Step 2: Augment images
images_dir = "resized_images\\all_images_after_resize_224X224"  # Update with the path to your images folder
augmented_images_dir = "augmented_images_folder"  # Folder to save augmented images

# Create the folder if it doesn't exist
if not os.path.exists(augmented_images_dir):
    os.makedirs(augmented_images_dir)

# Function to perform data augmentation on an image
def augment_image(image):
    augmented_images = []

    # Flip horizontally
    augmented_images.append(cv2.flip(image, 1))

    # Ensure the image is not empty
    if image.size > 0:
        # Get image height and width
        height, width = image.shape[:2]

        # Convert to RGB (OpenCV uses BGR by default)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Rotation angles (in degrees)
        rotation_angles = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110]

        for angle in rotation_angles:
            # Calculate rotation matrix
            rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)

            # Apply rotation to the image
            rotated_image = cv2.warpAffine(image_rgb, rotation_matrix, (width, height))

            # Convert back to BGR
            rotated_image_bgr = cv2.cvtColor(rotated_image, cv2.COLOR_RGB2BGR)

            # Append rotated image to augmented_images list
            augmented_images.append(rotated_image_bgr)

    return augmented_images

# Step 3: Update the table with augmented images
augmented_mapping = []

for index, row in mapping_table.iterrows():
    image_name = row['image_name']
    label_columns = ['crema', 'color_clarity', 'presentation', 'type_of_cup', 
                     'type_of_coffee', 'served_way', 'is_relevant']
    label_values = [row[col] for col in label_columns]
    image_path = os.path.join(images_dir, image_name)
    image_extension = ".jpg"  # Image extension
    image = cv2.imread(image_path + image_extension)

    # Perform data augmentation
    augmented_images = augment_image(image)

    # Save augmented images and update the mapping table
    for i, augmented_image in enumerate(augmented_images):
        augmented_image_name = f"{os.path.splitext(image_name)[0]}_augmented_{i+1}.jpg"
        augmented_image_path = os.path.join(augmented_images_dir, augmented_image_name)
        cv2.imwrite(augmented_image_path, augmented_image)
        augmented_mapping.append({'image_name': augmented_image_name, **dict(zip(label_columns, label_values))})

# Convert augmented mapping to DataFrame
augmented_mapping_df = pd.DataFrame(augmented_mapping)

# Append augmented mapping to the original mapping table
updated_mapping_table = pd.concat([mapping_table, augmented_mapping_df], ignore_index=True)

# Save the updated mapping table
updated_mapping_table.to_csv("updated_mapping_table.csv", index=False)

In [1]:
import pandas as pd
import os

# Define the directory containing the Excel files
excel_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'Image_labels')

# List of file names for the 17 Excel tables
file_names = [
    "sample_1.xlsx", 
    "sample_2.xlsx", 
    "sample_3.xlsx", 
    "sample_4.xlsx", 
    "sample_5.xlsx", 
    "sample_6.xlsx", 
    "sample_7.xlsx", 
    "sample_8.xlsx", 
    "sample_9.xlsx", 
    "sample_10.xlsx", 
    "sample_11.xlsx", 
    "sample_12.xlsx", 
    "sample_13.xlsx", 
    "sample_14.xlsx", 
    "sample_15.xlsx", 
    "sample_16.xlsx", 
    "sample_17.xlsx"
]

# List to hold DataFrames for each table
tables = []

# Read each Excel file and append its DataFrame to the tables list
for file_name in file_names:
    file_path = os.path.join(excel_dir, file_name)
    df = pd.read_excel(file_path)
    tables.append(df)

# Concatenate all DataFrames in the tables list
combined_table = pd.concat(tables)

# Write the combined DataFrame to a new Excel file
combined_table.to_excel("combined_tables.xlsx", index=False)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


FileNotFoundError: [Errno 2] No such file or directory: 'table1.xlsx'