In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
 
from sklearn.model_selection import train_test_split
from sklearn import metrics
import time
import cv2
import gc
import os 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import warnings
warnings.filterwarnings('ignore')


In [2]:
dataset_path = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/'
print(os.listdir(dataset_path))

['lung_aca', 'lung_scc', 'lung_n']


In [3]:
classes = os.listdir(dataset_path)
classes

['lung_aca', 'lung_scc', 'lung_n']

In [4]:
IMG_SIZE = 256
SPLIT = 0.2
BATCH_SIZE = 64
learning_rate = 0.003
NODE = np.int_(IMG_SIZE / 4)

In [5]:
from torchvision import datasets
from torch.utils.data import DataLoader

# Define transformations (e.g., resizing, normalization)
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Load dataset from local directory
data_dir = 'C:/Users/abdun/Lung_cancer/lung_colon_image_set/lung_image_sets'
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Create DataLoader
#dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Load dataset


generator1 = torch.Generator().manual_seed(42)
train_set, val_set = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=generator1 )

# Create DataLoader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True,)

print(len(train_set))
img, label = dataset[10]
print(img.shape)


12000
torch.Size([3, 256, 256])


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
# Creating a ResBlock
class ResBlock(nn.Module):
   def __init__(self, n_chans):
       super(ResBlock, self).__init__()
       self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                                             padding=1, bias=False)
       self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
       torch.nn.init.kaiming_normal_(self.conv.weight,
                                           nonlinearity='relu')
       torch.nn.init.constant_(self.batch_norm.weight, 0.5)
       torch.nn.init.zeros_(self.batch_norm.bias)

   def forward(self, x):
       out = self.conv(x)
       out = self.batch_norm(out)
       out = torch.relu(out)
       return out + x
 

In [8]:
# Creating a Deep ResNet
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=40):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
                  *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(n_chans1 * NODE * NODE , 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 3)
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = torch.relu(self.fc1(out))
        out = torch.relu(self.fc3(out))
        out = torch.relu(self.fc4(out))
        out = self.fc2(out)
        return out

In [9]:
model = NetResDeep()
model = nn.DataParallel(model)
model = model.to(device=device)

# Set Loss function with criterion
loss_fn = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.01, momentum = 0.5)  

total_step = len(train_loader)

In [10]:
epoch_list = []
time_list = []
train_lost_list = []
train_acc_list = []
val_acc_list =[]

In [11]:
start_time = time.time()
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, inter_test, dump_csv):
   
  model.train()
  for epoch in range(1, n_epochs + 1):
      loss_train = 0.0
      for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            #model = model.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

      epoch_time = time.time() - start_time
      epoch_time = f"{epoch_time:.2f}"
      if epoch == 1 or epoch  != 0:
         print('{} Epoch {}, Training loss {}'.format(
             epoch_time, epoch,
             loss_train / len(train_loader)))
      if(inter_test == 1 or epoch > 20 or epoch % 5 ==0):  
        acc_train, acc_val = validate(model, train_loader, test_loader)
      if(dump_csv == 1):
        epoch_list.append(epoch)
        time_list.append(epoch_time)
        train_lost_list.append(loss_train / len(train_loader))
        train_acc_list.append(acc_train)
        val_acc_list.append(acc_val)


            
            


In [12]:
def validate(model, train_loader, val_loader):
     model.eval()
     for name, loader in [("train", train_loader), ("val", val_loader)]:
             correct = 0
             total = 0
             with torch.no_grad():
                for imgs, labels in loader:
                    imgs = imgs.to(device=device)
                    labels = labels.to(device=device)
                    model = model.to(device=device)
                    outputs = model(imgs)
                    _, predicted = torch.max(outputs, dim=1)
                    total += labels.shape[0]
                    correct += int((predicted == labels).sum())

             print("Accuracy {}: {:.2f}".format(name , correct / total))
             if(name == "train"):
                acc_train = correct / total
             if(name == "val"):
                acc_val = correct / total
     return f"{acc_train:.2f}", f"{acc_val:.2f}" 

In [13]:
inter_test = 1
dump_csv = 1
training_loop(n_epochs = 40, optimizer = optimizer, model = model, loss_fn = loss_fn, train_loader = train_loader, 
                inter_test = inter_test, dump_csv = dump_csv)

results = {
    "Epoch": epoch_list,
    "Time": time_list,
    "Train_Accuracy": train_acc_list,
    "Val_Accuracy":val_acc_list
}

df = pd.DataFrame(results)
df.to_csv('/kaggle/working/DeepRsnt_results_test.csv', index=False)

print(f"Training results successfully exported to 'DeepRsnt_results_test.csv'")

128.58 Epoch 1, Training loss 1.1230983868082787
Accuracy train: 0.33
Accuracy val: 0.35
346.94 Epoch 2, Training loss 0.8597571528021325
Accuracy train: 0.34
Accuracy val: 0.37
551.41 Epoch 3, Training loss 0.45793444187717236
Accuracy train: 0.84
Accuracy val: 0.84
755.54 Epoch 4, Training loss 0.3587310200834528
Accuracy train: 0.83
Accuracy val: 0.82
961.06 Epoch 5, Training loss 0.31932079990176443
Accuracy train: 0.84
Accuracy val: 0.83
1168.12 Epoch 6, Training loss 0.3104345309449003
Accuracy train: 0.87
Accuracy val: 0.86
1375.48 Epoch 7, Training loss 0.2840918056825374
Accuracy train: 0.89
Accuracy val: 0.88
1580.55 Epoch 8, Training loss 0.2755908043460643
Accuracy train: 0.88
Accuracy val: 0.87
1787.60 Epoch 9, Training loss 0.2649485721708612
Accuracy train: 0.87
Accuracy val: 0.85
1993.63 Epoch 10, Training loss 0.252078961541361
Accuracy train: 0.91
Accuracy val: 0.89
2199.92 Epoch 11, Training loss 0.24387127454293536
Accuracy train: 0.88
Accuracy val: 0.87
2404.88 Epo

NameError: name 'csv_file' is not defined

In [14]:
validate(model, train_loader, test_loader)


Accuracy train: 0.99
Accuracy val: 0.93


('0.99', '0.93')

In [None]:
#20 Blocks
#Accuracy train: 0.99
#Accuracy val: 0.94


In [15]:
!lscpu

Architecture:                         x86_64
CPU op-mode(s):                       32-bit, 64-bit
Byte Order:                           Little Endian
Address sizes:                        46 bits physical, 48 bits virtual
CPU(s):                               4
On-line CPU(s) list:                  0-3
Thread(s) per core:                   2
Core(s) per socket:                   2
Socket(s):                            1
NUMA node(s):                         1
Vendor ID:                            GenuineIntel
CPU family:                           6
Model:                                85
Model name:                           Intel(R) Xeon(R) CPU @ 2.00GHz
Stepping:                             3
CPU MHz:                              2000.156
BogoMIPS:                             4000.31
Hypervisor vendor:                    KVM
Virtualization type:                  full
L1d cache:                            64 KiB
L1i cache:                            64 KiB
L2 cache:                  

In [None]:
#torch.save(model.state_dict(), data_dir + 'Lung_cancer.pt')
