In [None]:
import os
import re
import csv
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torchvision.models import ResNet101_Weights
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Exponentiation, RationalQuadratic
from sklearn.metrics import mean_absolute_error
from google.colab import drive

drive.mount('/content/drive')
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


DATASET_PATHS = {"train": "/content/drive/MyDrive/Image_train/", "val": "/content/drive/MyDrive/Image_val/"}
CSV_PATHS = {"train": "/content/drive/MyDrive/Image_train_features_new.csv", "val": "/content/drive/MyDrive/Image_val_features_new.csv"}
# DATASET_PATHS = {"train": "/content/drive/MyDrive/Image_train_gaussian_15/", "val": "/content/drive/MyDrive/Image_val_gaussian_15/"}
# CSV_PATHS = {"train": "/content/drive/MyDrive/Image_train_features_new_gaussian_15.csv", "val": "/content/drive/MyDrive/Image_val_features_new_gaussian_15.csv"}
# DATASET_PATHS = {"train": "/content/drive/MyDrive/Image_train_gaussian_30/", "val": "/content/drive/MyDrive/Image_val_gaussian_30/"}
# CSV_PATHS = {"train": "/content/drive/MyDrive/Image_train_features_new_gaussian_30.csv", "val": "/content/drive/MyDrive/Image_val_features_new_gaussian_30.csv"}
# DATASET_PATHS = {"train": "/content/drive/MyDrive/Image_train_rotate_10/", "val": "/content/drive/MyDrive/Image_val_rotate_10/"}
# CSV_PATHS = {"train": "/content/drive/MyDrive/Image_train_features_new_rotate_10.csv", "val": "/content/drive/MyDrive/Image_val_features_new_rotate_10.csv"}
# DATASET_PATHS = {"train": "/content/drive/MyDrive/Image_train_rotate_20/", "val": "/content/drive/MyDrive/Image_val_rotate_20/"}
# CSV_PATHS = {"train": "/content/drive/MyDrive/Image_train_features_new_rotate_20.csv", "val": "/content/drive/MyDrive/Image_val_features_new_rotate_20.csv"}




transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

def extract_bmi_from_filename(filename):
    match = re.match(r"\d+?_([FMfm])_(\d+?)_(\d+?)_(\d+).+", filename)
    if not match:
        print(f"Skipping invalid filename: {filename}")
        return None, None, None
    try:
        height = int(match.group(3)) / 100000
        weight = int(match.group(4)) / 100000
        bmi = weight / (height ** 2)
        return height, weight, bmi
    except ValueError:
        print(f"Error parsing BMI from filename: {filename}")
        return None, None, None

class BMIDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.folder = folder
        self.image_files = [f for f in os.listdir(folder) if f.lower().endswith(('.png','.jpg','.jpeg'))]
        self.transform = transform
    def __len__(self):
        return len(self.image_files)
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.folder, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        height, weight, bmi = extract_bmi_from_filename(img_name)
        if height is None:
            bmi = 0.0
        return image, float(bmi)

train_dataset = BMIDataset(DATASET_PATHS["train"], transform=transform)
val_dataset   = BMIDataset(DATASET_PATHS["val"], transform=transform)
train_loader  = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader    = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

class DFNet(nn.Module):
    def __init__(self, num_features=32):
        super(DFNet, self).__init__()
        self.resnet = models.resnet101(weights=ResNet101_Weights.IMAGENET1K_V1)
        in_feats = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        self.fc1 = nn.Linear(in_feats, num_features)
        self.relu = nn.ReLU()
        self.regressor = nn.Linear(num_features, 1)
    def forward(self, x):
        x = self.resnet(x)
        features = self.relu(self.fc1(x))
        out = self.regressor(features)
        return out, features
model_nn = DFNet(num_features=32).to(DEVICE)

criterion = nn.MSELoss()
optimizer = optim.Adam(model_nn.parameters(), lr=1e-4)
num_epochs = 10
for epoch in range(num_epochs):
    model_nn.train()
    running_loss = 0.0
    for images, targets in train_loader:
        images = images.to(DEVICE)
        targets = torch.as_tensor(targets, dtype=torch.float32, device=DEVICE).unsqueeze(1)
        optimizer.zero_grad()
        outputs, _ = model_nn(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

model_nn.eval()
# all_preds = []
# all_targets = []
# with torch.no_grad():
#     for images, targets in val_loader:
#         images = images.to(DEVICE)
#         targets = torch.as_tensor(targets, dtype=torch.float32, device=DEVICE).unsqueeze(1)
#         outputs, _ = model_nn(images)
#         all_preds.extend(outputs.cpu().numpy().flatten().tolist())
#         all_targets.extend(targets.cpu().numpy().flatten().tolist())
# val_mae = mean_absolute_error(all_targets, all_preds)
# print(f"DFNet Validation MAE: {val_mae:.4f}")



In [None]:
def load_human_features(csv_path):
    import pandas as pd
    df = pd.read_csv(csv_path)
    feature_dict = {}
    for _, row in df.iterrows():
        filename = row["Filename"]
        features = [float(row[f"Anthro_{i+1}"]) for i in range(7)]
        feature_dict[filename] = features
    return feature_dict

def extract_features(image_path, model, human_features_dict):
    img_e = cv2.imread(image_path)
    if img_e is None:
        print(f"Failed to read image: {image_path}")
        return None
    filename = os.path.basename(image_path)
    anthro_feats = human_features_dict.get(filename, [0.0]*7)
    pil_img = Image.fromarray(cv2.cvtColor(img_e, cv2.COLOR_BGR2RGB))
    img_tensor = transform(pil_img).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        _, deep_feats = model(img_tensor)
        deep_feats = torch.squeeze(deep_feats.cpu().detach()).numpy().tolist()
    torch.cuda.empty_cache()
    return anthro_feats + deep_feats
BATCH_SIZE = 10
def process_dataset(dataset_type, model, human_features_dict):
    image_folder = DATASET_PATHS[dataset_type]
    output_csv = CSV_PATHS[dataset_type]
    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    total_images = len(image_files)
    if total_images == 0:
        print(f"No images found in {image_folder}")
        return
    with open(output_csv, mode="w", newline="") as file:
        writer = csv.writer(file)
        header = ["Filename", "Height", "Weight", "BMI"] + \
                 [f"Anthro_{i+1}" for i in range(7)] + [f"DeepFeature_{i+1}" for i in range(32)]
        writer.writerow(header)
        for batch_start in range(0, total_images, BATCH_SIZE):
            batch_files = image_files[batch_start: batch_start+BATCH_SIZE]
            batch_results = []
            for img_name in batch_files:
                img_path = os.path.join(image_folder, img_name)
                height, weight, bmi = extract_bmi_from_filename(img_name)
                if height is None:
                    continue
                feats = extract_features(img_path, model, human_features_dict)
                if feats is None:
                    continue
                batch_results.append([img_name, height, weight, bmi] + feats)
            writer.writerows(batch_results)
            torch.cuda.empty_cache()
            progress = min(batch_start+BATCH_SIZE, total_images)
            print(f"Processed {progress}/{total_images} images in {dataset_type} ({progress/total_images:.0%})")
    print(f"Feature extraction complete for {dataset_type}! Saved to {output_csv}")

train_human_features = load_human_features("/content/drive/MyDrive/Image_train_features.csv")
val_human_features = load_human_features("/content/drive/MyDrive/Image_val_features.csv")
# train_human_features = load_human_features("/content/drive/MyDrive/Image_train_gaussian_15_features.csv")
# val_human_features = load_human_features("/content/drive/MyDrive/Image_val_gaussian_15_features.csv")
# train_human_features = load_human_features("/content/drive/MyDrive/Image_train_gaussian_30_features.csv")
# val_human_features = load_human_features("/content/drive/MyDrive/Image_val_gaussian_30_features.csv")
# train_human_features = load_human_features("/content/drive/MyDrive/Image_train_rotate_10_features.csv")
# val_human_features = load_human_features("/content/drive/MyDrive/Image_val_rotate_10_features.csv")
# train_human_features = load_human_features("/content/drive/MyDrive/Image_train_rotate_20_features.csv")
# val_human_features = load_human_features("/content/drive/MyDrive/Image_val_rotate_20_features.csv")

process_dataset("train", model_nn, train_human_features)
process_dataset("val", model_nn, val_human_features)


train_data = np.loadtxt(CSV_PATHS["train"], delimiter=",", skiprows=1, usecols=range(1, 4+7+32))
val_data = np.loadtxt(CSV_PATHS["val"], delimiter=",", skiprows=1, usecols=range(1, 4+7+32))
y_train = train_data[:, 2]
x_7f_train = train_data[:, 3:10]
x_df_train = train_data[:, 10:]
y_val = val_data[:, 2]
x_7f_val = val_data[:, 3:10]
x_df_val = val_data[:, 10:]
Mean = np.mean(x_7f_train, axis=0)
Std = np.std(x_7f_train, axis=0)
x_7f_train_norm = (x_7f_train - Mean) / Std
x_7f_val_norm = (x_7f_val - Mean) / Std
x_train = np.concatenate([x_7f_train_norm, x_df_train], axis=1)
x_val = np.concatenate([x_7f_val_norm, x_df_val], axis=1)
kernel = Exponentiation(RationalQuadratic(), exponent=2)
gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-3)
gpr.fit(x_train, y_train)
y_pred = gpr.predict(x_val)
mae = mean_absolute_error(y_val, y_pred)
print(f"GPR Validation MAE: {mae:.4f}")
