In [27]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import zipfile
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog
import shutil

In [2]:
image_folder = 'illinois_doc_dataset/side/side'

In [7]:
image_features = {}
image_files = os.listdir(image_folder)

# Total number of images
total_images = len(image_files)

# Loop through each image
for idx, image_name in enumerate(image_files):
    image_id = image_name.split('.')[0]  # Extract image ID (name without extension)
    
    # Load the image
    img = cv2.imread(os.path.join(image_folder, image_name))
    
    # Check if image is successfully loaded
    if img is None:
        print(f"Error loading image: {image_name}")
        continue 
    
    # Resize the image to a fixed size (e.g., 128x128)
    img_resized = cv2.resize(img, (128, 128))
    
    # Convert the image to grayscale
    img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
    
    # Extract HOG features (flatten the features if necessary)
    features, hog_image = hog(img_gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    
    # Store the features in the dictionary with the image ID as the key
    image_features[image_id] = features
    
    # Calculate progress percentage
    progress = (idx + 1) / total_images * 100
    print(f"Progress: {progress:.2f}% ({idx + 1}/{total_images})", end='\r')

print("\nImage loading and feature extraction complete!")

Error loading image: A00220.jpg
Error loading image: A15763.jpg
Error loading image: A56106.jpg
Error loading image: A61982.jpg
Error loading image: A70618.jpg
Error loading image: A81652.jpg
Error loading image: A81850.jpg
Error loading image: A82134.jpg
Error loading image: A82875.jpg
Error loading image: A86277.jpg
Error loading image: A90976.jpg
Error loading image: A91256.jpg
Error loading image: A92207.jpg
Error loading image: A93184.jpg
Error loading image: A93211.jpg
Error loading image: B00366.jpg
Error loading image: B00857.jpg
Error loading image: B01107.jpg
Error loading image: B01791.jpg
Error loading image: B04630.jpg
Error loading image: B06088.jpg
Error loading image: B09213.jpg
Error loading image: B09724.jpg
Error loading image: B12719.jpg
Error loading image: B13502.jpg
Error loading image: B14500.jpg
Error loading image: B16775.jpg
Error loading image: B18498.jpg
Error loading image: B18634.jpg
Error loading image: B20284.jpg
Error loading image: B20741.jpg
Error lo

In [12]:

# Load the CSV file into a DataFrame
csv_file = 'persons_processed.csv'
df = pd.read_csv(csv_file)



In [15]:
# Ensure the 'id' column is of the same type as the keys in image_features (usually string)
df['id'] = df['id'].astype(str)



In [17]:
from tqdm import tqdm


In [19]:
def get_image_features(image_id):
    return image_features.get(image_id, None)

tqdm.pandas(desc="Applying image features")

# Apply the function to the 'id' column to create a new column 'image_features'
df['image_features'] = df['id'].progress_apply(get_image_features)


Applying image features: 100%|███████████████████████████████████████████████| 61110/61110 [00:00<00:00, 385704.75it/s]


In [21]:

# Define a function to get image features from the dictionary, return None if the id is not found

# Save the updated DataFrame to a new CSV file
df.to_csv('updated_file.csv', index=False)

# Optionally, print the updated DataFrame
print(df.head())

       id  weight  height   sex        BMI  \
0  A00147   185.0    67.0  Male  28.971931   
1  A00220   155.0    73.0  Male  20.447551   
2  A00360   167.0    69.0  Male  24.658895   
3  A00367   245.0    72.0  Male  33.224344   
4  A01054   166.0    67.0  Male  25.996436   

                                      image_features  
0  [0.22856919751862967, 0.04150920729006487, 0.0...  
1                                               None  
2  [0.23262199139383705, 0.10198317857575802, 0.0...  
3  [0.19465186607192328, 0.1105202892829526, 0.06...  
4  [0.24357151117732567, 0.21394401765673984, 0.2...  


In [23]:
df = pd.read_csv('updated_file.csv')
df = df.rename(columns={'image_features': 'side_image_features'})
df.to_csv('updated_file.csv')