In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
 
from sklearn.model_selection import train_test_split
from sklearn import metrics
import time 
import cv2
import gc
import os 

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import warnings
warnings.filterwarnings('ignore')


In [2]:
dataset_path = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets/'
print(os.listdir(dataset_path))

['lung_aca', 'lung_scc', 'lung_n']


In [3]:
classes = os.listdir(dataset_path)
classes

['lung_aca', 'lung_scc', 'lung_n']

In [4]:
IMG_SIZE = 256
SPLIT = 0.2
EPOCHS = 10
BATCH_SIZE = 64
learning_rate = 0.001
num_epochs = 20
NODE = np.int_(IMG_SIZE / 4)

In [5]:
from torchvision import datasets
from torch.utils.data import DataLoader

# Define transformations (e.g., resizing, normalization)
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),  # Resize images to 224x224
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Load dataset from local directory
data_dir = 'C:/Users/abdun/Lung_cancer/lung_colon_image_set/lung_image_sets'
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Create DataLoader
#dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Load dataset


generator1 = torch.Generator().manual_seed(42)
train_set, val_set = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=generator1 )

# Create DataLoader
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True,)

print(len(train_set))
img, label = dataset[10]
print(img.shape)


12000
torch.Size([3, 256, 256])


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
# Creating a CNN class
class NetWidth(nn.Module):
  def __init__(self, n_chanl = 32):
    super().__init__()
    self.conv1 = nn.Conv2d(3, n_chanl, kernel_size=3, padding=1)
    self.act1 = nn.Tanh()
    self.pool1 = nn.MaxPool2d(2)
    self.conv2 = nn.Conv2d(n_chanl, n_chanl // 2, kernel_size=3, padding=1)
    self.act2 = nn.Tanh()
    self.pool2 = nn.MaxPool2d(2)
    self.fc1 = nn.Linear(n_chanl * NODE * NODE // 2, 64) # NODE is (IMG_SIZE)devided by (2*2) from the max pool.
    self.act3 = nn.ReLU()
    self.fc3 = nn.Linear(64, 64)
    self.fc4 = nn.Linear(64, 32)
    self.fc2 = nn.Linear(32, 3)
  def forward(self, x):
    out = self.pool1(self.act3(self.conv1(x)))
    out = self.pool2(self.act3(self.conv2(out)))
    out = out.view(out.size(0), -1)
    out = self.act3(self.fc1(out))
    out = self.act3(self.fc3(out))
    out = self.act3(self.fc4(out))
    out = self.fc2(out)
    return out


In [8]:
model = NetWidth()
model = nn.DataParallel(model)
model = model.to(device=device)

# Set Loss function with criterion
loss_fn = nn.CrossEntropyLoss()

# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.01, momentum = 0.5)  

total_step = len(train_loader)

In [9]:
epoch_list = []
time_list = []
train_lost_list = []
train_acc_list = []
val_acc_list =[]

In [10]:
start_time = time.time()
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, inter_test, dump_csv):
   
  model.train()
  for epoch in range(1, n_epochs + 1):
      loss_train = 0.0
      for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            #model = model.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

      epoch_time = time.time() - start_time
      epoch_time = f"{epoch_time:.2f}"
      if epoch == 1 or epoch  != 0:
         print('{} Epoch {}, Training loss {}'.format(
             epoch_time, epoch,
             loss_train / len(train_loader)))
      if(inter_test == 1 or epoch > 20 or epoch % 5 ==0):  
        acc_train, acc_val = validate(model, train_loader, test_loader)
      if(dump_csv == 1):
        epoch_list.append(epoch)
        time_list.append(epoch_time)
        train_lost_list.append(loss_train / len(train_loader))
        train_acc_list.append(acc_train)
        val_acc_list.append(acc_val)


            
            


In [11]:


def validate(model, train_loader, val_loader):
     model.eval()
     for name, loader in [("train", train_loader), ("val", val_loader)]:
             correct = 0
             total = 0
             with torch.no_grad():
                for imgs, labels in loader:
                    imgs = imgs.to(device=device)
                    labels = labels.to(device=device)
                    model = model.to(device=device)
                    outputs = model(imgs)
                    _, predicted = torch.max(outputs, dim=1)
                    total += labels.shape[0]
                    correct += int((predicted == labels).sum())

             print("Accuracy {}: {:.2f}".format(name , correct / total))
             if(name == "train"):
                acc_train = correct / total
             if(name == "val"):
                acc_val = correct / total
     return f"{acc_train:.2f}", f"{acc_val:.2f}" 



In [12]:
inter_test = 1
dump_csv = 1
training_loop(n_epochs = 40, optimizer = optimizer, model = model, loss_fn = loss_fn, train_loader = train_loader, 
                inter_test = inter_test, dump_csv = dump_csv)

results = {
    "Epoch": epoch_list,
    "Time": time_list,
    "Train_Accuracy": train_acc_list,
    "Val_Accuracy":val_acc_list
}

df = pd.DataFrame(results)
csv_file = "CNN_Width_results.csv"
df.to_csv('/kaggle/working/DeepRsnt_results.csv', index=False)

print(f"Training results successfully exported to csv")



48.45 Epoch 1, Training loss 0.8373139586220396
Accuracy train: 0.75
Accuracy val: 0.74
184.47 Epoch 2, Training loss 0.47278600772644613
Accuracy train: 0.77
Accuracy val: 0.76
300.21 Epoch 3, Training loss 0.4254218303459756
Accuracy train: 0.82
Accuracy val: 0.81
418.03 Epoch 4, Training loss 0.37948159881411714
Accuracy train: 0.83
Accuracy val: 0.82
536.38 Epoch 5, Training loss 0.35260819667514337
Accuracy train: 0.86
Accuracy val: 0.85
655.00 Epoch 6, Training loss 0.33347507574139756
Accuracy train: 0.84
Accuracy val: 0.83
772.11 Epoch 7, Training loss 0.32026555032489146
Accuracy train: 0.86
Accuracy val: 0.85
889.03 Epoch 8, Training loss 0.3109186295657716
Accuracy train: 0.86
Accuracy val: 0.85
1005.19 Epoch 9, Training loss 0.2969325399620736
Accuracy train: 0.89
Accuracy val: 0.88
1120.44 Epoch 10, Training loss 0.2872200665321756
Accuracy train: 0.88
Accuracy val: 0.86
1239.28 Epoch 11, Training loss 0.2768015183825442
Accuracy train: 0.89
Accuracy val: 0.87
1357.12 Epoc

In [13]:
validate(model, train_loader, test_loader)


Accuracy train: 0.93
Accuracy val: 0.90


('0.93', '0.90')

In [23]:
df.to_csv('/kaggle/working/DeepRsnt_results.csv', index=False)
print(results)

{'Epoch': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], 'Time': ['48.45', '184.47', '300.21', '418.03', '536.38', '655.00', '772.11', '889.03', '1005.19', '1120.44', '1239.28', '1357.12', '1474.52', '1593.32', '1710.52', '1827.45', '1945.25', '2060.74', '2179.61', '2295.78', '2413.17', '2532.02', '2646.67', '2765.67', '2883.08', '3000.71', '3117.72', '3235.37', '3349.24', '3466.49', '3582.36', '3697.53', '3812.58', '3929.30', '4046.52', '4160.22', '4274.93', '4389.07', '4505.86', '4620.12'], 'Train_Accuracy': ['0.75', '0.77', '0.82', '0.83', '0.86', '0.84', '0.86', '0.86', '0.89', '0.88', '0.89', '0.89', '0.90', '0.87', '0.91', '0.90', '0.91', '0.86', '0.90', '0.88', '0.91', '0.87', '0.93', '0.91', '0.93', '0.89', '0.93', '0.93', '0.94', '0.94', '0.95', '0.93', '0.95', '0.91', '0.95', '0.91', '0.95', '0.95', '0.93', '0.93'], 'Val_Accuracy': ['0.74', '0.76', '0.81', '0.82', '0.85',

In [14]:
#Accuracy train: 0.91
#Accuracy val: 0.89


In [15]:
!lscpu

Architecture:                         x86_64
CPU op-mode(s):                       32-bit, 64-bit
Byte Order:                           Little Endian
Address sizes:                        46 bits physical, 48 bits virtual
CPU(s):                               4
On-line CPU(s) list:                  0-3
Thread(s) per core:                   2
Core(s) per socket:                   2
Socket(s):                            1
NUMA node(s):                         1
Vendor ID:                            GenuineIntel
CPU family:                           6
Model:                                85
Model name:                           Intel(R) Xeon(R) CPU @ 2.00GHz
Stepping:                             3
CPU MHz:                              2000.160
BogoMIPS:                             4000.32
Hypervisor vendor:                    KVM
Virtualization type:                  full
L1d cache:                            64 KiB
L1i cache:                            64 KiB
L2 cache:                  

In [16]:
#torch.save(model.state_dict(), data_dir + 'Lung_cancer.pt')
