In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
from PIL import Image

reference kaggle dataset

In [25]:
train_df = pd.read_csv("./data/train.csv", index_col=0)
test_df = pd.read_csv("./data/test.csv")

In [26]:
train_df.info()
train_df

<class 'pandas.core.frame.DataFrame'>
Index: 79950 entries, 0 to 79949
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  79950 non-null  object
 1   label      79950 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.8+ MB


Unnamed: 0,file_name,label
0,train_data/a6dcb93f596a43249135678dfcfc17ea.jpg,1
1,train_data/041be3153810433ab146bc97d5af505c.jpg,0
2,train_data/615df26ce9494e5db2f70e57ce7a3a4f.jpg,1
3,train_data/8542fe161d9147be8e835e50c0de39cd.jpg,0
4,train_data/5d81fa12bc3b4cea8c94a6700a477cf2.jpg,1
...,...,...
79945,train_data/9283b107f6274279b6f15bbe77c523aa.jpg,0
79946,train_data/4c6b17fe6dd743428a45773135a10508.jpg,1
79947,train_data/1ccbf96d04e342fd9f629ad55466b29e.jpg,0
79948,train_data/ff960b55f296445abb3c5f304b52e104.jpg,1


In [27]:
test_df.info()
test_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5540 entries, 0 to 5539
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      5540 non-null   object
dtypes: object(1)
memory usage: 43.4+ KB


Unnamed: 0,id
0,test_data_v2/1a2d9fd3e21b4266aea1f66b30aed157.jpg
1,test_data_v2/ab5df8f441fe4fbf9dc9c6baae699dc7.jpg
2,test_data_v2/eb364dd2dfe34feda0e52466b7ce7956.jpg
3,test_data_v2/f76c2580e9644d85a741a42c6f6b39c0.jpg
4,test_data_v2/a16495c578b7494683805484ca27cf9f.jpg
...,...
5535,test_data_v2/483412064ff74d9d9472d606b65976d9.jpg
5536,test_data_v2/c0b49ba4081a4197b422dac7c15aea7f.jpg
5537,test_data_v2/01454aaedec140c0a3ca1f48028c41cf.jpg
5538,test_data_v2/e9adfea8b67e4791968c4c2bdd8ec343.jpg


In [28]:
train_split, val_split = train_test_split(train_df, test_size=0.25, shuffle=True, random_state=42, stratify=train_df['label'])
train_split.info()
print('\n')
val_split.info()

<class 'pandas.core.frame.DataFrame'>
Index: 59962 entries, 28034 to 24089
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  59962 non-null  object
 1   label      59962 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.4+ MB


<class 'pandas.core.frame.DataFrame'>
Index: 19988 entries, 59408 to 26706
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  19988 non-null  object
 1   label      19988 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 468.5+ KB


In [29]:
print(f'Label 0 in training set: {train_split[train_split['label'] == 0]['label'].count()}\n'
      + f'Label 1 in training set: {train_split[train_split['label'] == 1]['label'].count()}\n'
      + f'Label 0 in validation set: {val_split[val_split['label'] == 0]['label'].count()}\n'
      + f'Label 1 in validation set: {val_split[val_split['label'] == 1]['label'].count()}')

Label 0 in training set: 29981
Label 1 in training set: 29981
Label 0 in validation set: 9994
Label 1 in validation set: 9994


reference pytorch docs

In [49]:
class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None, test=False):
        self.dataframe = dataframe
        self.transform = transform
        self.test = test
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = "./data/" + self.dataframe.iloc[idx, 0]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        if self.test == False:
            label = self.dataframe.iloc[idx, 1]    
            return image, label
        
        elif self.test == True:
            return image

In [50]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [51]:
train_dataset = CustomImageDataset(train_split, transform)
val_dataset = CustomImageDataset(val_split, transform)
test_dataset = CustomImageDataset(test_df, transform, test=True)

In [52]:
batch_size = 128

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

add reference to using resnet50, image of architecture and why training is done on last layer

In [53]:
model = resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 2)

In [54]:
for param in model.parameters():
    param.requires_grad = False
    
for param in model.layer4.parameters():
    param.requires_grad = True
    
for param in model.fc.parameters():
    param.requires_grad = True

In [55]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
model.to(device)
print(f"Device: {device}")

Device: cuda


In [56]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

In [None]:
for epoch in range(10):
    model.train()
    total_loss = 0
    train_correct = 0
    train_count = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        pred = model(images)
        loss = loss_fn(pred, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        _, predicted = torch.max(pred.data, 1)
        train_count += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        
    train_acc = 100 * train_correct/train_count
    avg_train_loss = total_loss/len(train_loader)

    model.eval()
    val_correct = 0
    val_count = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            
            pred = model(images)
            _, predicted = torch.max(pred.data, 1)
            val_count += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            
    val_acc = 100 * val_correct/val_count
        
    print(f'---------------[ Epoch {epoch} ]---------------\n'
          + f'Training Accuracy: {train_acc}%\n'
          + f'Average Training Loss: {avg_train_loss}\n'
          + f'Validation Accuracy: {val_acc}%')
    
    scheduler.step()