In [None]:
import pandas as pd
import numpy as np

In [None]:
# Read CSV file
df = pd.read_csv('your_file.csv')

# Display the dataframe
print(f"Total rows: {len(df)}")
df.head()

In [None]:
# Filter for rows where image_id is NOT blank, but ALL other columns ARE blank
# Get all column names except 'image_id'
other_columns = [col for col in df.columns if col != 'image_id']

# Create filter:
# - image_id is not null/blank
# - all other columns are null/blank
filtered_df = df[
    (df['image_id'].notna()) &  # image_id is not blank
    (df['image_id'] != '') &     # image_id is not empty string
    (df[other_columns].isna().all(axis=1))  # all other columns are blank
]

print(f"Filtered rows: {len(filtered_df)}")
filtered_df

In [None]:
# Alternative: If you want to treat empty strings as blank too in other columns
filtered_df_alt = df[
    (df['image_id'].notna()) &  # image_id is not blank
    (df['image_id'] != '') &     # image_id is not empty string
    (df[other_columns].replace('', np.nan).isna().all(axis=1))  # all other columns are blank or empty
]

print(f"Filtered rows (alternative): {len(filtered_df_alt)}")
filtered_df_alt

In [None]:
# Display an image given its image_id
from pathlib import Path
from PIL import Image
from IPython.display import display

# Get the image_id from the filtered results (or specify manually)
# Example: Use the first filtered row's image_id
if len(filtered_df) > 0:
    image_id = filtered_df.iloc[0]['image_id']
    
    # Construct image path (adjust base path as needed)
    # Common pattern in this codebase: evaluation_data directory
    image_path = Path('evaluation_data') / image_id
    
    # Alternative: if images are elsewhere, adjust the path
    # image_path = Path('path/to/images') / image_id
    
    print(f"Displaying image: {image_id}")
    print(f"Full path: {image_path}")
    
    # Load and display the image
    if image_path.exists():
        img = Image.open(image_path)
        display(img)
    else:
        print(f"⚠️ Image not found at: {image_path}")
else:
    print("No filtered rows to display")

In [None]:
# Read CSV and replace all empty cells with 'NOT_FOUND'
df_cleaned = pd.read_csv('your_file.csv')

# Replace NaN/null values with 'NOT_FOUND'
df_cleaned = df_cleaned.fillna('NOT_FOUND')

# Also replace empty strings with 'NOT_FOUND'
df_cleaned = df_cleaned.replace('', 'NOT_FOUND')

print(f"Cleaned {len(df_cleaned)} rows")
print(f"Sample of cleaned data:")
df_cleaned.head()