In [1]:
import numpy as np
import csv
import os
from pathlib import Path
import supervision as sv

# Define the paths
depth_maps_folder = Path(r"C:\Users\minht\Downloads\2180_pics_nutrition5k\output")
depth_5_folder = Path(r"C:\Users\minht\Downloads\Food.v21-final-version.yolov5pytorch")
coin_to_plate = 25

# CSV file setup
csv_file_path = r'C:\Users\minht\Downloads\food_2180_nutrition5k.csv'
with open(csv_file_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['image_name', 'food_ID', 'image_area', 'volume', 'mass'])

# Function to find the corresponding label file
def find_label_file(depth_map_name):
    depth_map_prefix = depth_map_name[:15]
    for subset in ['test', 'train', 'valid']:
        labels_path = depth_5_folder / subset / 'labels'
        for label_file in labels_path.glob('*.txt'):
            if label_file.stem[:15] == depth_map_prefix:
                return label_file
    return None

# Process each depth map
for depth_map_file in depth_maps_folder.glob('*.npy'):
    depth_map = np.load(depth_map_file)
    mean_first_row = np.mean(depth_map[0, :])
    
    depth_map = depth_map * 100
    print(depth_map)
    label_file = find_label_file(depth_map_file.name)
    
    if label_file is None:
        print(f"No matching label file found for {depth_map_file.name}")
        continue

    mm_to_pixel = 0
    cam_plane_to_coin = 0

    with open(label_file, 'r') as file, open(csv_file_path, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for line in file:
            if int(line[0]) == 4:  # Assuming '0' indicates a coin
                print('Detected coin label')
                parts = line.strip().split()
                parts_float = [float(coord) for coord in parts]
                points = parts_float[1:]
                num_vertices = len(points) // 2
                polygon = np.array(points).reshape(num_vertices, 2)

                

                mask = sv.polygon_to_mask((polygon * 640).astype(int), (640, 640))
                image_area = np.count_nonzero(mask)
                mm_to_pixel = 13 / np.sqrt(image_area / np.pi)

                masked_depth_map = np.where(mask, depth_map, 0)
                non_zero_depths = masked_depth_map[masked_depth_map != 0]

                if non_zero_depths.size > 0:
                    cam_plane_to_coin = int(np.mean(non_zero_depths))
                    print(f'Mask Centroid calculated.')

        file.seek(0)  # Reset file read position for the second pass

        for line in file:
            if int(line[0]) != 4:  # Skip coin, process other objects
                parts = line.strip().split()
                food_ID = parts[0]
                parts_float = [float(coord) for coord in parts[1:]]
                num_vertices = len(parts_float) // 2
                polygon = np.array(parts_float).reshape(num_vertices, 2)

                # bounding_box = sv.polygon_to_xyxy(polygon)
                # print(bounding_box)

                mask = sv.polygon_to_mask((polygon * 640).astype(int), (640, 640))
                print(np.count_nonzero(mask))
                image_area = np.count_nonzero(mask)
                masked_depth_map = np.where(mask, depth_map, 0)
                non_zero_depths = masked_depth_map[masked_depth_map != 0]
                print(cam_plane_to_coin)
                print(non_zero_depths)

                if non_zero_depths.size > 0:
                    # Applying the proper corrections for calculations
                    food_depths = np.maximum(0, cam_plane_to_coin - non_zero_depths)
                    print(np.count_nonzero(food_depths))
                    print(np.min(food_depths))
                    print(food_depths)
                    food_depths = np.maximum(0, food_depths - coin_to_plate)
                    # print(len(food_depths))
                    print(np.count_nonzero(food_depths))
                    volume = np.sum(food_depths) * (mm_to_pixel**2)  # Calculate the volume
                else:
                    volume = 0
                
                # Write to CSV
                writer.writerow([depth_map_file.stem, food_ID, image_area, volume, '', ])  # Mass is left empty

print("Processing complete and data written to CSV.")


[[405.04614 401.14664 399.6164  ... 413.07184 421.17056 455.68594]
 [405.04614 401.14664 399.6164  ... 413.07184 421.17056 455.68594]
 [405.4841  399.63757 397.98264 ... 391.42798 391.8733  419.78928]
 ...
 [407.84644 406.82205 409.79453 ... 400.1625  402.3098  409.76318]
 [441.893   406.4459  407.18408 ... 412.57104 415.16043 445.78857]
 [441.893   406.4459  407.18408 ... 412.57104 415.16043 445.78857]]
Detected coin label
Mask Centroid calculated.
33923
405
[341.53723 341.60425 341.64243 ... 344.8787  344.8787  345.28314]
33923
46.061035
[63.46277  63.395752 63.357574 ... 60.121307 60.121307 59.716858]
33923
[[406.6012  399.77155 397.90015 ... 414.78033 423.17816 459.57928]
 [406.6012  399.77155 397.90015 ... 414.78033 423.17816 459.57928]
 [403.60974 398.5924  396.56107 ... 398.44373 398.77203 421.02   ]
 ...
 [402.41956 401.3495  404.55475 ... 400.25568 402.03525 398.74377]
 [430.8754  400.87732 401.8282  ... 398.37314 396.8027  420.62216]
 [430.8754  400.87732 401.8282  ... 398.37

In [2]:
import pandas as pd

# Load the first CSV file
df1 = pd.read_csv(r"C:\Users\minht\Downloads\output.csv")

# Load the second CSV file
df2 = pd.read_csv(r"C:\Users\minht\Downloads\food_2180_nutrition5k.csv")

# Process the 'image_name' in df2 to extract `{a}` before `_jpeg` or `_jpg`
df2['processed_image_name'] = df2['image_name'].str.extract(r'(.+?)_(?:jpeg|jpg)')

# Rename columns in df2 for easier comparison
df2.rename(columns={'food_ID': 'object_id'}, inplace=True)

# Create a dictionary from df2 for looking up 'volume' based on 'processed_image_name' and 'object_id'
volume_lookup = df2.set_index(['processed_image_name', 'object_id'])['volume'].to_dict()

# Define a function to apply to each row in df1 to look up the 'volume'
def get_volume(row):
    key = (row['image_name'], row['object_id'])
    return volume_lookup.get(key, None) if volume_lookup.get(key, 0) != 0 else None

# Apply the function to df1 to create the 'volume' column
df1['volume'] = df1.apply(get_volume, axis=1)

# Save the updated DataFrame back to CSV
df1.to_csv(r"C:\Users\minht\Downloads\output_with_volume.csv", index=False)


In [3]:
import pandas as pd

# Load the data from a CSV file
file_path = r"C:\Users\minht\Downloads\output_with_volume.csv"  # Adjust the path to your CSV file
data = pd.read_csv(file_path)

# Calculate the volume/mass ratio
data['volume_over_mass'] = data['volume'] / data['mass']

# Function to remove outliers based on a fixed threshold from the median
def remove_outliers(df, column_name):
    median_value = df[column_name].median()
    threshold = median_value/3
    # Calculate the acceptable range around the median
    lower_bound = median_value - threshold
    upper_bound = median_value + threshold
    # Filter out the outliers
    return df[(df[column_name] >= lower_bound) & (df[column_name] <= upper_bound)]

# Apply the function to each group of food_ID and concatenate the results
cleaned_data = data.groupby('object_id').apply(lambda x: remove_outliers(x, 'volume_over_mass')).reset_index(drop=True)
cleaned_data.drop('volume_over_mass', axis=1, inplace=True)

# Optionally, save the cleaned data back to a CSV
cleaned_data.to_csv(r"C:\Users\minht\Downloads\output_with_volume_3.csv", index=False)

print("Cleaned data has been saved and contains {} rows compared to the original {} rows.".format(len(cleaned_data), len(data)))


Cleaned data has been saved and contains 4267 rows compared to the original 6160 rows.
