In [2]:
import torch
from torch.utils.data import DataLoader
import os
import sys

project_root = os.path.abspath("..")
os.chdir(project_root)  # Change directory to project_root
# Verify current working directory
print("Current working directory:", os.getcwd())
# Now try importing
from src.data_processing.patch_dataset import PatchDataset
from src.data_processing.simclr_transforms import SimCLRTransform

Current working directory: /blue/vabfmc/data/working/d.uriartediaz/francokrepel/project-root


In [4]:
# Initialize dataset and dataloader
transform = SimCLRTransform(size=224)
dataset = PatchDataset(root_dir='data/patches/BCC', transform=transform)
batch_size = 256  # Adjust based on your GPU memory
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)




In [None]:
#Implement the training loop for SimCLR.

import torch
import torch.optim as optim
from src.models.simclr_model import SimCLRModel
from src.models.nt_xent_loss import NTXentLoss

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the SimCLR model
model = SimCLRModel(base_model='resnet18', out_dim=128)
model = model.to(device)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = NTXentLoss(batch_size=batch_size, temperature=0.5)

# Training loop
num_epochs = 100  # Adjust based on your needs
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for (xi, xj) in dataloader:
        xi = xi.to(device)
        xj = xj.to(device)

        optimizer.zero_grad()

        _, zi = model(xi)
        _, zj = model(xj)

        loss = criterion(zi, zj)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
    
    
# saving the encoder part of the model for feature extraction.
torch.save(model.encoder.state_dict(), 'models/simclr_encoder.pth')