In [1]:
import os 
import cv2
import pandas as pd
import torch
from ultralytics import YOLO
import numpy as np
from glob import glob
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor #type: ignore
from sklearn.model_selection import train_test_split#type: ignore
from sklearn.metrics import mean_squared_error      #type: ignore   

In [2]:
train_df = pd.read_csv('Train.csv')
test_df = pd.read_csv('Test.csv')   

In [3]:
train_path = 'data/train'
test_path = 'data/test'
model_path = 'Models/Models'

In [4]:
model = YOLO(os.path.join(model_path, 'best_full.pt'))

In [7]:
# Function to get image paths
def get_image_paths(folder):
    return sorted(glob(os.path.join(folder, '*.jpg')))  

In [6]:
def extract_features(model, image_path):
    image = cv2.imread(image_path)
    results = model(image)

    masks = results[0].masks.xy if results[0].masks else []
    segmented_image = np.zeros_like(image)


    total_area = 0
    for mask in masks:
        points = np.array(mask, dtype=np.int32)
        cv2.fillPoly(segmented_image, [points], (255, 255, 255))
        total_area += cv2.contourArea(points)

    return total_area

In [8]:
train_features = []
train_labels = []

for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
    folder = os.path.join(train_path, row['FolderName'])
    side = row['Side']
    images = get_image_paths(folder)
    images = [img for img in images if f'_{side}']

    best_area = 0 
    for img_path in images:
        area = extract_features(model, img_path)
        best_area = max(best_area, area)

    train_features.append(best_area)
    train_labels.append(row['RootVolume'])

train_features = np.array(train_features).reshape(-1, 1)
train_labels = np.array(train_labels)

100%|██████████| 386/386 [00:03<00:00, 117.24it/s]


In [9]:
X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_val)
rmse = np.sqrt(mean_squared_error(y_val, y_pred))
print(f'Validation RMSE: {rmse}')

Validation RMSE: 1.3679645003081706


In [13]:
test_features = []
test_ids = []

for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
    folder = os.path.join(test_path, row['FolderName'])
    side = row['Side']
    images = get_image_paths(folder)
    images = [img for img in images if f'_{side}_' in img]

    best_area = 0 
    for img_path in images:
        area = extract_features(model, img_path)
        best_area = max(best_area, area)

    test_features.append(best_area)
    test_ids.append(row['FolderName'])

test_features = np.array(test_features).reshape(-1, 1)
predictions = regressor.predict(test_features)

100%|██████████| 130/130 [00:00<00:00, 172.41it/s]


In [14]:
submission_df = pd.DataFrame({"ID": test_ids, "RootVolume": predictions})

In [16]:
submission_df['RootVolume'].unique

<bound method Series.unique of 0      2.088643
1      2.088643
2      2.088643
3      2.088643
4      2.088643
         ...   
125    2.088643
126    2.088643
127    2.088643
128    2.088643
129    2.088643
Name: RootVolume, Length: 130, dtype: float64>

In [17]:
#converting to csv
submission_df.to_csv('submission201.csv', index=False)

In [None]:
def display_sample_images(dir_path, sample=5):
    dir_path = Path(dir_path) if isinstance(dir_path, str) else dir_path

    image_list = []
    # Sort the images to ensure they are processed in order
    images = sorted(dir_path.glob("*.jpg"))
    if not images:
        return None

    # Iterate over the first 'sample' images
    for img_path in images[:sample]:
        img = read_image(str(img_path))
        resize_transform = transforms.Resize((240, 240))
        img = resize_transform(img)
        image_list.append(img)

    # Organize the grid to have 'sample' images per row
    Grid = make_grid(image_list, nrow=5)
    # Convert the tensor grid to a PIL Image for display
    img = torchvision.transforms.ToPILImage()(Grid)
    return img

(130, 2)