In [8]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
from sklearn.preprocessing import MinMaxScaler
import timm
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
wandb.login(key='0f806ca646c884d0d0105d882367d470d2d664e4')

DATASET_PTH = '/Users/sudhanshurai/Desktop/Dilip sir/dfi prediction/dataset'

[34m[1mwandb[0m: Currently logged in as: [33mstatboffin[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/sudhanshurai/.netrc


In [21]:
class CustomImageDataset(Dataset):
	def __init__(self, img_dir, df, transform=None):
		self.img_dir = img_dir
		self.img_name = df['file_name'].values
		self.img_labels = df['label'].values.astype(float)
		self.transform = transform

	def __len__(self):
		return len(self.img_name)

	def __getitem__(self, idx):
		img_path = os.path.join(self.img_dir, self.img_name[idx])
		image = Image.open(img_path).convert("RGB")
		label = self.img_labels[idx]
		if self.transform:
			image = self.transform(image)
		return image, label
	
def get_augmentations(phase):
	if phase == 'train':
		return transforms.Compose([
			transforms.Resize((224,224)),
			transforms.RandomHorizontalFlip(),
			transforms.RandomVerticalFlip(),
			transforms.RandomRotation(90),
			transforms.ToTensor(),
			transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ,

		])
	else:
		return transforms.Compose([
			transforms.Resize((224,224)),
			transforms.ToTensor(),
			transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ,
		])

In [22]:
def get_train_test_dataset(val_numb: int):
	img_dir = DATASET_PTH
	train_transform = get_augmentations('train')
	test_transform = get_augmentations('test')
	train_val_df = pd.read_csv(os.path.join(img_dir, 'annotations.csv'))
	
	train_df = train_val_df[train_val_df['donnor'] != val_numb]
	test_df = train_val_df[train_val_df['donnor'] == val_numb]
	
	train_dataset = CustomImageDataset(img_dir, train_df, train_transform)
	test_dataset = CustomImageDataset(img_dir, test_df, test_transform)

	return train_dataset, test_dataset

In [23]:
import torch
import torchvision.models as models
import timm

# Function to create SEResNext model with enhanced classifier and dropout
def create_seresnext_model(dropout_prob=0.5):
	model = timm.create_model('seresnext50_32x4d', pretrained=True)
	num_ftrs = model.fc.in_features
	model.fc = torch.nn.Sequential(
		torch.nn.Linear(num_ftrs, 512),
		torch.nn.ReLU(),
		torch.nn.Dropout(dropout_prob),  # Dropout layer added
		torch.nn.Linear(512, 256),
		torch.nn.ReLU(),
		torch.nn.Dropout(dropout_prob),  # Dropout layer added
		torch.nn.Linear(256, 1)  # Adjust for regression task
	)
	return model


In [27]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr
from tqdm import tqdm

def calculate_metrics(labels, outputs):
	mean_absolute_error_value = mean_absolute_error(labels, outputs)
	mean_squared_error_value = mean_squared_error(labels, outputs)
	pearson_correlation, _ = pearsonr(labels, outputs)  # Use pearsonr to calculate Pearson correlation
	
	return mean_absolute_error_value, mean_squared_error_value, pearson_correlation

def train_model(model, train_dataloader, val_dataloader, criterion, optimizer, logger=None, num_epochs=25):
	
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model.to(device)
	
	for epoch in range(num_epochs):
		model.train()  # Set model to training mode
		train_running_loss = 0.0
		train_all_labels = []
		train_all_outputs = []
		
		for train_inputs, train_labels in tqdm(train_dataloader, desc="Training"):
			train_inputs = train_inputs.to(device)
			train_labels = train_labels.to(device)

			optimizer.zero_grad()

			train_outputs = model(train_inputs)
			train_loss = criterion(train_outputs.squeeze(), train_labels.float())
						
			train_loss.backward()
			optimizer.step()

			train_running_loss += train_loss.item()
			train_all_labels.append(train_labels.cpu().numpy())
			train_all_outputs.append(train_outputs.squeeze().cpu().detach().numpy())
		
		train_epoch_loss = train_running_loss / len(train_dataloader)
		train_all_labels = np.concatenate(train_all_labels)
		train_all_outputs = np.concatenate(train_all_outputs)

		train_mean_absolute_error, train_mean_squared_error, train_pearson_correlation = calculate_metrics(train_all_labels, train_all_outputs)

		print(f'Training Loss: {train_epoch_loss:.4f}')
		print(f'Training Pearson Correlation: {train_pearson_correlation:.4f}')

		# Validation phase
		val_running_loss = 0.0
		val_all_labels = []
		val_all_outputs = []
		model.eval()  # Set model to evaluate mode

		with torch.no_grad():
			for val_inputs, val_labels in tqdm(val_dataloader, desc="Validation"):
				val_inputs = val_inputs.to(device)
				val_labels = val_labels.to(device)
				# Forward pass
				val_outputs = model(val_inputs)
				val_loss = criterion(val_outputs.squeeze(), val_labels.float())

				# Statistics
				val_running_loss += val_loss.item()
				val_all_labels.append(val_labels.cpu().numpy())
				val_all_outputs.append(val_outputs.squeeze().cpu().numpy())
		
		val_epoch_loss = val_running_loss / len(val_dataloader)
		val_all_labels = np.concatenate(val_all_labels)
		val_all_outputs = np.concatenate(val_all_outputs)

		# Calculate validation metrics
		val_mean_absolute_error, val_mean_squared_error, val_pearson_correlation = calculate_metrics(val_all_labels, val_all_outputs)

		# Logging
		if logger is not None:
			logger.log({
				'train_loss': train_epoch_loss,
				'val_loss': val_epoch_loss,
				'val_mean_absolute_error': val_mean_absolute_error,
				'val_mean_squared_error': val_mean_squared_error,
				'val_pearson_correlation': val_pearson_correlation,
				'epoch_number': epoch + 1  # Epoch number starting from 1
			})

		print(f'Validation Loss: {val_epoch_loss:.4f}')
		print(f'Validation Pearson Correlation: {val_pearson_correlation:.4f}')

	print('Training and Validation complete')


In [32]:
def RunExperiment(set_numb, model_name, numb_epochs, lr=0.0001):
	model_dict = {
		'seresnext': create_seresnext_model,
		# 'resnet': create_resnet_model,
		# 'inception': create_inception_model,
		# 'densenet': create_densenet_model,
		# 'efficientnet': create_efficientnet_model,
		# 'vit_base_patch16_224': create_vit_model,
		# 'mobilenetv3large': create_mobilenetv3large_model,
		# 'vgg19': create_vgg19_model
	}

	# Create model
	model = model_dict[model_name](0.0001)
	
	# Load datasets
	train_dataset, val_dataset = get_train_test_dataset(set_numb)
	
	# Create data loaders
	train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
	val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=1)
	
	# Define loss function and optimizer
	criterion = nn.MSELoss()
	optimizer = optim.Adam(model.parameters(), lr=lr)
	
	run = wandb.init(
		project="DFI_prediction",
		name=f"tp_{model_name}_main_kaggle_set_numb_{set_numb}",
		config={
			'model_name': model_name,
			'num_epochs': numb_epochs,
			'learning_rate': lr,
			'batch_size': 32,
			'dataset_number': set_numb
		}
	)
	
	# Train and test the model
	train_model(model, train_dataloader, val_dataloader, criterion, optimizer, num_epochs=numb_epochs, logger=run)
	
	
	# Finish the WandB run
	if run is not None:
		run.finish()

In [33]:
RunExperiment(2, 'seresnext', 2)

Training:   0%|          | 0/29 [00:00<?, ?it/s]
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<string>", line 1, in <module>
  File "/Users/sudhanshurai/.pyenv/versions/3.9.10/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/sudhanshurai/.pyenv/versions/3.9.10/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
  File "/Users/sudhanshurai/Desktop/Dilip sir/dfi prediction/myenv/lib/python3.9/site-packages/torch/__init__.py", line 1477, 

RuntimeError: DataLoader worker (pid(s) 6592) exited unexpectedly