In [1]:
import os
import shutil
import random
import csv
import numpy as np
import pandas as pd

In [2]:
#Filter images in a subfolder based on the .txt files
def filter_and_save_images(folder_path, txt_file_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Read image names from the text file
    with open(txt_file_path, 'r') as file:
        image_names = [line.strip().lower() for line in file.readlines()]

    # Get a list of all files in the specified folder
    all_files = os.listdir(folder_path)

    # Filter only the files that match the image names from the text file
    selected_images = [img for img in all_files if os.path.splitext(img)[0].strip().lower() in map(str.lower, image_names)]

    # Save the selected images to the output folder
    for image in selected_images:
        source_path = os.path.join(folder_path, image)
        destination_path = os.path.join(output_folder, image)

        # Copy the image to the output foldere
        shutil.copy2(source_path, destination_path)
        print(f"Copying from {source_path} to {destination_path}")


        # Print or process the selected images
        print(f"Saved: {destination_path}")

In [3]:
def train_val_split(input_folder, output_folder_valid, split):
    os.makedirs(output_folder_valid, exist_ok=True)

    image_files = [f for f in os.listdir(input_folder) if f.endswith('.jpg')]

    # Calculate the number of images to move to the validation set and select random
    num_validation = int(len(image_files) * split)
    validation_files = random.sample(image_files, num_validation)

    # Move the validation set to the 'valid' folder
    for file in validation_files:
        src_path = os.path.join(input_folder, file)
        dst_path = os.path.join(output_folder_valid, file)
        shutil.move(src_path, dst_path)

    print("Validation set moved to the 'valid' folder.")

In [4]:
def move_csv_files(csv_folder, image_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Get the list of CSV files in the csv_folder
    csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')]

    # Get the list of image names in the image_folder
    image_names = [os.path.splitext(f)[0] for f in os.listdir(image_folder) if f.endswith('.jpg')]

    # Move CSV files to the output folder based on image names
    for csv_file in csv_files:
        name_without_extension = os.path.splitext(csv_file)[0]
        if name_without_extension in image_names:
            src_path = os.path.join(csv_folder, csv_file)
            dst_path = os.path.join(output_folder, csv_file)
            shutil.move(src_path, dst_path)
            print(f"Moved {csv_file} to {output_folder}")

In [5]:
#Extract bbox coordinates and classes (first 5 elements separated by comma)
def extract_values_from_csv(csv_folder):
   
    # Initialize a list to store names and values
    name_value_list = []

    # Iterate through CSV files in the folder
    for csv_file in os.listdir(csv_folder):
        if csv_file.lower().endswith('.csv'):
            # Form paths for the CSV file and the output file
            csv_path = os.path.join(csv_folder, csv_file)

            with open(csv_path, 'r') as csv_input:
                reader = csv.reader(csv_input)

                # Extract the first five values for each row
                for row in reader:
                    if row and len(row) >= 5:
                        values_to_extract = row[:5]

                    # Store the name and values in the list
                    name_value_list.append({'name': os.path.splitext(csv_file)[0], 'values': values_to_extract})

            print(f"Extracted values from {csv_file}.")

    return name_value_list

In [6]:
#Put the value of my list into a dataframe (Name, Left, Top, Right, Bottom, Class)
def list_to_dataframe(name_value_list):
    data_list = []

    for item in name_value_list:
        name = item['name']
        values = item['values']

        if len(values) >= 5:
            data_list.append({
                'Name': name,
                'Left': float(values[0]),
                'Top': float(values[1]),
                'Right': float(values[2]),
                'Bottom': float(values[3]),
                'Class': values[4]
            })

    df = pd.DataFrame(data_list)
    return df

In [19]:
def xyxy_normalization(df, img_width=1920, img_height=1088):
    new_df = pd.DataFrame()

    for index in range(df.shape[0]):
        left = df.iloc[index, 1]
        top = df.iloc[index, 2]
        right = df.iloc[index, 3]
        bottom = df.iloc[index, 4]

        # Normalize coordinates
        normalized_left = left / img_width
        normalized_top = top / img_height
        normalized_right = right / img_width
        normalized_bottom = bottom / img_height

        
        normalized_row = {
            'Name': df.iloc[index, 0],
            'Class': df.iloc[index, 5],
            'Normalized_Left': normalized_left,
            'Normalized_Top': normalized_top,
            'Normalized_Right': normalized_right,
            'Normalized_Bottom': normalized_bottom,
             
        }

        # Append the new row to the DataFrame
        new_df = pd.concat([new_df, pd.DataFrame([normalized_row])], ignore_index=True)

    return new_df

In [22]:
def rows_as_txt(dataframe, output_directory):
    # Iterate through rows and save each row as a text file
    for index, row in dataframe.iterrows():
        # Extract values
        name = str(row['Name'])
        values = ' '.join(map(str, row[1:]))
        file_content = f"{values}\n"

        # Save to a text file
        file_path = os.path.join(output_directory, f"{name}.txt")

        if os.path.exists(file_path):
            with open(file_path, 'a') as file:
                file.write(file_content)
        else:
            # If the file doesn't exist, create a new one
            with open(file_path, 'w') as file:
                file.write(file_content)

    print("Files saved in the output directory.")

In [8]:
folder_path = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/images'
txt_file_path1 = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/train_split.txt'
output_folder1 = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/train'
txt_file_path2 = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/test_split.txt'
output_folder2 = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/test'

filter_and_save_images(folder_path, txt_file_path1, output_folder1)
filter_and_save_images(folder_path, txt_file_path2, output_folder2)

Copying from /Users/matteocoletta/Desktop/Proj/MultiObjDect/images/ave-0117-0013.jpg to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/ave-0117-0013.jpg
Saved: /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/ave-0117-0013.jpg
Copying from /Users/matteocoletta/Desktop/Proj/MultiObjDect/images/ave-0358-0014.jpg to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/ave-0358-0014.jpg
Saved: /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/ave-0358-0014.jpg
Copying from /Users/matteocoletta/Desktop/Proj/MultiObjDect/images/vwg-0746-0005.jpg to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/vwg-0746-0005.jpg
Saved: /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/vwg-0746-0005.jpg
Copying from /Users/matteocoletta/Desktop/Proj/MultiObjDect/images/vwg-1244-0015.jpg to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/vwg-1244-0015.jpg
Saved: /Users/matteocoletta/Desktop/Proj/MultiObjDect/train/vwg-1244-0015.jpg
Copying from /Users/matteocoletta/Desktop/Proj/Multi

In [9]:
input_folder = output_folder1
output_folder_valid = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/valid'
train_val_split(input_folder,output_folder_valid, split=0.3)

Validation set moved to the 'valid' folder.


In [10]:
csv_folder = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/CropOrWeed2'
image_folder1 = output_folder1
train_csv_folder = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv'
move_csv_files(csv_folder, image_folder1, train_csv_folder)

image_folder2 = output_folder_valid
valid_csv_folder = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/valid_csv'
move_csv_files(csv_folder, image_folder2, valid_csv_folder)

image_folder3 = output_folder2
test_csv_folder = '/Users/matteocoletta/Desktop/Proj/MultiObjDect/test_csv'
move_csv_files(csv_folder, image_folder3, test_csv_folder)

Moved vwg-0008-0018.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved vwg-0688-0037.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved vwg-0636-0012.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved vwg-0799-0004.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0067-0004.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0465-0002.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0176-0023.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved vwg-0798-0004.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0464-0016.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0083-0005.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved ave-0507-0006.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/train_csv
Moved vwg-0856-0018.csv to /Users/matteocoletta/Desktop/Proj/MultiObjDect/tr

In [11]:
train_value_list = extract_values_from_csv(train_csv_folder)
valid_value_list = extract_values_from_csv(valid_csv_folder)
test_value_list = extract_values_from_csv(test_csv_folder)

Extracted values from vwg-0008-0018.csv.
Extracted values from vwg-0688-0037.csv.
Extracted values from vwg-0636-0012.csv.
Extracted values from vwg-0799-0004.csv.
Extracted values from ave-0067-0004.csv.
Extracted values from ave-0465-0002.csv.
Extracted values from ave-0176-0023.csv.
Extracted values from vwg-0798-0004.csv.
Extracted values from ave-0464-0016.csv.
Extracted values from ave-0083-0005.csv.
Extracted values from ave-0507-0006.csv.
Extracted values from vwg-0856-0018.csv.
Extracted values from vwg-1256-0012.csv.
Extracted values from vwg-0357-0004.csv.
Extracted values from ave-0506-0006.csv.
Extracted values from vwg-1256-0006.csv.
Extracted values from vwg-0119-0003.csv.
Extracted values from vwg-0229-0005.csv.
Extracted values from ave-0246-0019.csv.
Extracted values from vwg-0785-0015.csv.
Extracted values from vwg-1287-0005.csv.
Extracted values from vwg-0785-0001.csv.
Extracted values from ave-0479-0013.csv.
Extracted values from vwg-1328-0007.csv.
Extracted values

In [12]:
train_df = list_to_dataframe(train_value_list)
valid_df = list_to_dataframe(valid_value_list)
test_df = list_to_dataframe(test_value_list)

In [13]:
print(train_df.head())

            Name    Left    Top   Right  Bottom Class
0  vwg-0008-0018   997.0  864.0  1093.0   945.0     1
1  vwg-0008-0018     0.0  926.0   176.0  1088.0     0
2  vwg-0008-0018    42.0  863.0    71.0   890.0     1
3  vwg-0008-0018   446.0  371.0   576.0   471.0     1
4  vwg-0008-0018  1038.0  966.0  1086.0  1027.0     1


In [20]:
norm_train_df = xyxy_normalization(train_df)
norm_valid_df = xyxy_normalization(valid_df)
norm_test_df = xyxy_normalization(test_df)

In [21]:
print(norm_train_df.head())

            Name Class  Normalized_Left  Normalized_Top  Normalized_Right  \
0  vwg-0008-0018     1         0.519271        0.794118          0.569271   
1  vwg-0008-0018     0         0.000000        0.851103          0.091667   
2  vwg-0008-0018     1         0.021875        0.793199          0.036979   
3  vwg-0008-0018     1         0.232292        0.340993          0.300000   
4  vwg-0008-0018     1         0.540625        0.887868          0.565625   

   Normalized_Bottom  
0           0.868566  
1           1.000000  
2           0.818015  
3           0.432904  
4           0.943934  


In [23]:
rows_as_txt(norm_train_df, output_folder1)
rows_as_txt(norm_valid_df, output_folder_valid)
rows_as_txt(norm_test_df, output_folder2)

Files saved in the output directory.
Files saved in the output directory.
Files saved in the output directory.
