In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
 
from sklearn.model_selection import train_test_split
from sklearn import metrics
 
import cv2
import gc
import os 
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import warnings
warnings.filterwarnings('ignore')


In [2]:
dataset_path = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/'
print(os.listdir(dataset_path))

['lung_aca', 'lung_scc', 'lung_n']


In [3]:
classes = os.listdir(dataset_path)
classes

['lung_aca', 'lung_scc', 'lung_n']

In [4]:
IMG_SIZE = 256
SPLIT = 0.2
EPOCHS = 10
BATCH_SIZE = 64
learning_rate = 0.001
num_epochs = 20
NODE = np.int_(IMG_SIZE / 4)

In [5]:
from torchvision import datasets
from torch.utils.data import DataLoader

# Define transformations (e.g., resizing, normalization)
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Load dataset from local directory
data_dir = 'C:/Users/abdun/Lung_cancer/lung_colon_image_set/lung_image_sets'
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Create DataLoader
#dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Load dataset


generator1 = torch.Generator().manual_seed(42)
train_set, val_set = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=generator1 )

# Create DataLoader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True,)

print(len(train_set))
img, label = dataset[10]
print(img.shape)


12000
torch.Size([3, 256, 256])


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
# Creating a CNN class
class NetBatchNorm(nn.Module):
  def __init__(self, n_chanl = 32):
    super().__init__()
    self.conv1 = nn.Conv2d(3, n_chanl, kernel_size=3, padding=1)
    self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chanl)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)
    self.conv2 = nn.Conv2d(n_chanl, n_chanl // 2, kernel_size=3, padding=1)
    self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chanl // 2)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)
    self.fc1 = nn.Linear(n_chanl * NODE * NODE // 2, 64) # NODE is (IMG_SIZE)devided by (2*2) from the max pool.
    self.act3 = nn.ReLU()
    self.fc3 = nn.Linear(64, 64)
    self.fc4 = nn.Linear(64, 32)
    self.fc2 = nn.Linear(32, 3)
  def forward(self, x):
    out = self.conv1_batchnorm(self.conv1(x))
    out = self.pool1(self.act3(out))
    out = self.conv2_batchnorm(self.conv2(out))
    out = self.pool2(self.act3(out))
    out = out.view(out.size(0), -1)
    out = self.act3(self.fc1(out))
    out = self.act3(self.fc3(out))
    out = self.act3(self.fc4(out))
    out = self.fc2(out)
    return out


In [8]:
model = NetBatchNorm()
model = nn.DataParallel(model)
model = model.to(device=device)

# Set Loss function with criterion
loss_fn = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.01, momentum = 0.5)  

total_step = len(train_loader)

In [9]:
epoch_list = []
time_list = []
train_lost_list = []
train_acc_list = []
val_acc_list =[]

In [10]:
start_time = time.time()
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, inter_test, dump_csv):
   
  model.train()
  for epoch in range(1, n_epochs + 1):
      loss_train = 0.0
      for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            #model = model.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

      epoch_time = time.time() - start_time
      epoch_time = f"{epoch_time:.2f}"
      if epoch == 1 or epoch  != 0:
         print('{} Epoch {}, Training loss {}'.format(
             epoch_time, epoch,
             loss_train / len(train_loader)))
      if(inter_test == 1 or epoch > 20 or epoch % 5 ==0):  
        acc_train, acc_val = validate(model, train_loader, test_loader)
      if(dump_csv == 1):
        epoch_list.append(epoch)
        time_list.append(epoch_time)
        train_lost_list.append(loss_train / len(train_loader))
        train_acc_list.append(acc_train)
        val_acc_list.append(acc_val)


            
            



            
            


In [11]:
def validate(model, train_loader, val_loader):
     model.eval()
     for name, loader in [("train", train_loader), ("val", val_loader)]:
             correct = 0
             total = 0
             with torch.no_grad():
                for imgs, labels in loader:
                    imgs = imgs.to(device=device)
                    labels = labels.to(device=device)
                    model = model.to(device=device)
                    outputs = model(imgs)
                    _, predicted = torch.max(outputs, dim=1)
                    total += labels.shape[0]
                    correct += int((predicted == labels).sum())

             print("Accuracy {}: {:.2f}".format(name , correct / total))
             if(name == "train"):
                acc_train = correct / total
             if(name == "val"):
                acc_val = correct / total
     return f"{acc_train:.2f}", f"{acc_val:.2f}" 

In [13]:
inter_test = 1
dump_csv = 1
training_loop(n_epochs = 40, optimizer = optimizer, model = model, loss_fn = loss_fn, train_loader = train_loader, 
                inter_test = inter_test, dump_csv = dump_csv)

results = {
    "Epoch": epoch_list,
    "Time": time_list,
    "Train_Accuracy": train_acc_list,
    "Val_Accuracy":val_acc_list
}

df = pd.DataFrame(results)
df.to_csv('/kaggle/working/Batchnorm_results.csv', index=False)

print(f"Training results successfully exported to csv")


 

127.62 Epoch 1, Training loss 0.6456619735727919
Accuracy train: 0.86
Accuracy val: 0.86
260.23 Epoch 2, Training loss 0.3094023400323188
Accuracy train: 0.88
Accuracy val: 0.87
378.78 Epoch 3, Training loss 0.2504438913645262
Accuracy train: 0.87
Accuracy val: 0.86
495.89 Epoch 4, Training loss 0.23058709264435667
Accuracy train: 0.93
Accuracy val: 0.91
614.00 Epoch 5, Training loss 0.20470053742223598
Accuracy train: 0.89
Accuracy val: 0.88
732.73 Epoch 6, Training loss 0.19365041017373827
Accuracy train: 0.94
Accuracy val: 0.92
849.80 Epoch 7, Training loss 0.17527003245467834
Accuracy train: 0.95
Accuracy val: 0.92
966.81 Epoch 8, Training loss 0.1619719416299399
Accuracy train: 0.96
Accuracy val: 0.92
1083.82 Epoch 9, Training loss 0.14180149009173854
Accuracy train: 0.96
Accuracy val: 0.92
1199.71 Epoch 10, Training loss 0.1268670677029072
Accuracy train: 0.96
Accuracy val: 0.93
1316.71 Epoch 11, Training loss 0.11111917239712908
Accuracy train: 0.94
Accuracy val: 0.91
1436.01 Ep

In [None]:
model.eval()
validate(model, train_loader, test_loader)


In [None]:
#Accuracy train: 0.91
#Accuracy val: 0.89


In [None]:
!lscpu

In [None]:
#torch.save(model.state_dict(), data_dir + 'Lung_cancer.pt')
