# 1: Setup and Imports

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc, roc_auc_score
import warnings
import matplotlib.pyplot as plt

# Set device to CPU or GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
warnings.filterwarnings("ignore")

# Function to seed everything
def seed_everything(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

# 2: Load Dataset

In [2]:
# Load the dataset
# Update the path to your dataset
file_path = "/kaggle/input/dataset/marking.csv"
data = pd.read_csv(file_path)

# Preview the dataset
print(data.head())

# Replace this with the actual test dataset
test = data.copy()

   patient_id      image_id  target  source     sex  age_approx  \
0  IP_7279968  ISIC_2637011       0  ISIC20    male        45.0   
1  IP_3075186  ISIC_0015719       0  ISIC20  female        45.0   
2  IP_2842074  ISIC_0052212       0  ISIC20  female        50.0   
3  IP_6890425  ISIC_0068279       0  ISIC20  female        45.0   
4  IP_8723313  ISIC_0074268       0  ISIC20  female        55.0   

  anatom_site_general_challenge  
0                     head/neck  
1               upper extremity  
2               lower extremity  
3                     head/neck  
4               upper extremity  


# 3: Custom Dataset Class

In [3]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, transforms=None):
        self.dataframe = dataframe
        self.transforms = transforms

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        # Get image path and target
        row = self.dataframe.iloc[index]
        image_id = row['image_id']  # Reference the correct column
        target = row['target']  # Assuming this is the target column
        
        # Load image (adjust path if needed)
        image_path = f"/kaggle/input/dataset/512x512-test/512x512-test/ISIC_0052060.jpg"
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)

        # Apply transformations
        if self.transforms:
            image = self.transforms(image)

        return image, target

# 4: Define Transformations

In [4]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset and dataloader
dataset = CustomDataset(dataframe=test, transforms=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)

# 5: Load Model

In [5]:
# Load a pre-trained model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 1)  # Adjust output layer for binary classification
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 160MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# 6: Inference Loop

In [6]:
# Dummy inference loop
model.eval()
predictions = []
with torch.no_grad():
    for images, _ in tqdm(dataloader):
        images = images.to(device)
        outputs = model(images)
        predictions.extend(outputs.cpu().numpy())

100%|██████████| 1891/1891 [1:01:47<00:00,  1.96s/it]


# 7: Save Predictions

In [7]:
# Save predictions
test['predictions'] = predictions
test.to_csv("predictions.csv", index=False)

print("Predictions saved to predictions.csv")

Predictions saved to predictions.csv


In [11]:
import pandas as pd

# Load the predictions CSV file
predictions = pd.read_csv("predictions.csv")

# Display the first few rows of the file
print(predictions.head())

   patient_id      image_id  target  source     sex  age_approx  \
0  IP_7279968  ISIC_2637011       0  ISIC20    male        45.0   
1  IP_3075186  ISIC_0015719       0  ISIC20  female        45.0   
2  IP_2842074  ISIC_0052212       0  ISIC20  female        50.0   
3  IP_6890425  ISIC_0068279       0  ISIC20  female        45.0   
4  IP_8723313  ISIC_0074268       0  ISIC20  female        55.0   

  anatom_site_general_challenge   predictions  
0                     head/neck  [-0.6931233]  
1               upper extremity  [-0.6931233]  
2               lower extremity  [-0.6931233]  
3                     head/neck  [-0.6931233]  
4               upper extremity  [-0.6931233]  
