In [1]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
        # print(os.path.join(dirname, filename))

import time
import datetime
timestamp_exec_start = time.time()


files = []
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if (filename[-3:] == 'png'):
            files.append(os.path.join(dirname, filename))
print("Processed ",len(files),"files")


labels_dict = {"B_A-":0,"B_F-":1,"B_TA":2,"B_PT":3,"M_DC":4,"M_LC":5,"M_MC":6,"M_PC":7}  # for 8 class problem
labels_dict_simple = {"B":0,"M":1}                                                       # for 2 class problem
REDUCED_CLASSES = False

X = []
Y = []
for f in files:
    x = f.split("/") # break up the path
    x = x[-1:][0]    # extract the file name
    X.append(str(f))
    if REDUCED_CLASSES:
        Y.append(int(labels_dict_simple[x[4]]))
    else:
        Y.append(int(labels_dict[x[4:8]]))

data = {"images":X,"labels":Y}
images_df = pd.DataFrame(data, columns = ['images','labels'])
images_df.groupby("labels")["labels"].count()

Processed  7909 files


labels
0     444
1    1014
2     569
3     453
4    3451
5     626
6     792
7     560
Name: labels, dtype: int64

In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader, Dataset


In [3]:
train, val = train_test_split(images_df, stratify=images_df.labels, test_size=0.2)
len(train), len(val)

(6327, 1582)

In [4]:
class MyDataset(Dataset):
    def __init__(self, df_data,transform=None):
        super().__init__()
        self.df = df_data.values
        
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        img_path,label = self.df[index]
        
        image = cv2.imread(img_path)
        image = cv2.resize(image, (224,224))
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [5]:
## Parameters for model

# Hyper parameters
num_epochs = 200
num_classes = 8
batch_size = 16
learning_rate = 0.0002

# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [6]:
# Utility function for saving model
# During training, the loss values are stored in a list.
# We check the last two values to see if the loss has reduced.
def save_checkpoint(state, loss):
    global best_loss
    """Save checkpoint if a new best is achieved"""
    if best_loss>=loss:        
        print ("=> Loss reduced by:\t",best_loss - loss)
        print("   Saving model state")
        torch.save(state, "state_dict.dct")  # save checkpoint
        best_loss = loss

In [7]:
trans_train = transforms.Compose([transforms.ToPILImage(),
                                  transforms.Pad(64, padding_mode='reflect'),
                                  transforms.RandomHorizontalFlip(), 
                                  transforms.RandomVerticalFlip(),
                                  transforms.RandomRotation(20), 
                                  transforms.Resize(224, interpolation = 2),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])])

trans_valid = transforms.Compose([transforms.ToPILImage(),                    
                                  transforms.Pad(64, padding_mode='reflect'),
                                  transforms.Resize(224, interpolation = 2),
                                  transforms.ToTensor(),
                                  transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])])

dataset_train = MyDataset(df_data=train, transform=trans_train)
dataset_valid = MyDataset(df_data=val,transform=trans_valid)

loader_train = DataLoader(dataset = dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
loader_valid = DataLoader(dataset = dataset_valid, batch_size=batch_size//2, shuffle=False, num_workers=0)

Now, we create the model and train it.

In [8]:
import torch.nn as nn
from torchvision import models


model =  models.shufflenet_v2_x1_0(pretrained=True)
model.fc = nn.Linear(in_features=1024, out_features=num_classes, bias=True)
model = model.to(device)
print(model)

Downloading: "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth" to /root/.cache/torch/checkpoints/shufflenetv2_x1-5666bf0f80.pth


HBox(children=(FloatProgress(value=0.0, max=9218294.0), HTML(value='')))


ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): InvertedResidual(
      (branch1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU(inplace=True)
      )
      (branch2): Sequential(
        (0): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running

In [9]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adamax(model.parameters(), lr=learning_rate)


import os
import urllib.request
url = "https://onedrive.live.com/download?cid=E5569BBAB912B6A5&resid=E5569BBAB912B6A5%21532997&authkey=AIIbJpY85FBd61c"
file_name = "download.dct"
print("Downloading pre-trained model state...")
urllib.request.urlretrieve(url, file_name)
print("Done!")
model.load_state_dict(torch.load(file_name))
import os.path
if os.path.exists(file_name):
    try:
        model.load_state_dict(torch.load(file_name))
        print("Downloaded model state loaded successfully.")
    except:
        print("Error loading state dict.")
        


In [10]:
# Train the model
timestamp_train_start = time.time()

loss_hist = []
best_loss = 9
total_step = len(loader_train)

for epoch in range(num_epochs):
    timestamp_epoch_start = time.time()
    print("Epoch ", epoch+1," started...")
    for i, (images, labels) in enumerate(loader_train):
        images = images.to(device)
        labels = labels.to(device)
                
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
       
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))    
             # Checkpointing
            loss_hist.append(float(loss.item())) #add current loss value.
            save_checkpoint(model.state_dict(),float(loss.item()))
                    
    timestamp_epoch_end = time.time()
    print("Epoch done in ",str(datetime.timedelta(seconds=(timestamp_epoch_end - timestamp_epoch_start))))
    
timestamp_train_end = time.time()
print("Training done in ",str(datetime.timedelta(seconds=(timestamp_train_end - timestamp_train_start))))

Epoch  1  started...
Epoch [1/200], Step [100/396], Loss: 1.9477
=> Loss reduced by:	 7.052331805229187
   Saving model state
Epoch [1/200], Step [200/396], Loss: 1.6581
=> Loss reduced by:	 0.2896103858947754
   Saving model state
Epoch [1/200], Step [300/396], Loss: 1.8876
Epoch done in  0:02:27.466482
Epoch  2  started...
Epoch [2/200], Step [100/396], Loss: 1.1990
=> Loss reduced by:	 0.4590599536895752
   Saving model state
Epoch [2/200], Step [200/396], Loss: 1.3895
Epoch [2/200], Step [300/396], Loss: 1.3788
Epoch done in  0:01:51.481608
Epoch  3  started...
Epoch [3/200], Step [100/396], Loss: 1.1901
=> Loss reduced by:	 0.008939504623413086
   Saving model state
Epoch [3/200], Step [200/396], Loss: 1.3007
Epoch [3/200], Step [300/396], Loss: 1.2669
Epoch done in  0:01:48.734219
Epoch  4  started...
Epoch [4/200], Step [100/396], Loss: 1.3042
Epoch [4/200], Step [200/396], Loss: 1.5882
Epoch [4/200], Step [300/396], Loss: 1.0486
=> Loss reduced by:	 0.14150667190551758
   Savin

In [11]:
#load the best model
model.load_state_dict(torch.load("state_dict.dct"))

model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loader_valid:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
          
    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'final_state.dct')

Test Accuracy of the model on the test images: 93.36283185840708 %


In [12]:
files = []
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if (filename[-3:] == 'png'):
            files.append(os.path.join(dirname, filename))
print("Processed ",len(files),"files")

labels_dict = {"B_A-":0,"B_F-":1,"B_TA":2,"B_PT":3,"M_DC":4,"M_LC":5,"M_MC":6,"M_PC":7}  # for 8 class problem
labels_dict_simple = {"B":0,"M":1}                                                       # for 2 class problem
REDUCED_CLASSES = False

X = []
Y = []
M = []
for f in files:
    x = f.split("/") # break up the path
    x = x[-1:][0]    # extract the file name
    X.append(str(f))
    if REDUCED_CLASSES:
        Y.append(int(labels_dict_simple[x[4]]))
    else:
        Y.append(int(labels_dict[x[4:8]]))
    m = f.split("-")
    m = m[-2]
    M.append(m)

data = {"images":X,"labels":Y,"magnification":M }
images_df_t = pd.DataFrame(data, columns = ['images','labels'])
images_df = pd.DataFrame(data, columns = ['images','labels','magnification'])
images_df_40 = images_df.loc[images_df['magnification']=='40'].drop(['magnification'], axis=1)
images_df_100 = images_df.loc[images_df['magnification']=='100'].drop(['magnification'], axis=1)
images_df_200 = images_df.loc[images_df['magnification']=='200'].drop(['magnification'], axis=1)
images_df_400 = images_df.loc[images_df['magnification']=='400'].drop(['magnification'], axis=1)

dataset_img40 = MyDataset(df_data=images_df_40, transform=trans_valid)
loader_img40 = DataLoader(dataset = dataset_img40, batch_size=batch_size//2, shuffle=False, num_workers=0)

dataset_img100 = MyDataset(df_data=images_df_100, transform=trans_valid)
loader_img100 = DataLoader(dataset = dataset_img100, batch_size=batch_size//2, shuffle=False, num_workers=0)

dataset_img200 = MyDataset(df_data=images_df_200, transform=trans_valid)
loader_img200 = DataLoader(dataset = dataset_img200, batch_size=batch_size//2, shuffle=False, num_workers=0)

dataset_img400 = MyDataset(df_data=images_df_400, transform=trans_valid)
loader_img400 = DataLoader(dataset = dataset_img400, batch_size=batch_size//2, shuffle=False, num_workers=0)
l_list = [loader_img40,loader_img100,loader_img200,loader_img400]

Processed  7909 files


In [13]:
model.load_state_dict(torch.load('final_state.dct'))
model.eval()
count = 0
for l in l_list:
    count+=1
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in l:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _,predicted = torch.max(outputs.data,1)
            total+=labels.size(0)
            correct+= (predicted==labels).sum().item()
        print('Test Accuracy of model on {} is {} %'.format(count,100*(correct/total)))

Test Accuracy of model on 1 is 97.89473684210527 %
Test Accuracy of model on 2 is 97.6453628063431 %
Test Accuracy of model on 3 is 97.66517635370094 %
Test Accuracy of model on 4 is 97.14285714285714 %


In [14]:
timestamp_exec_end = time.time()
print("Total execution time: ",str(datetime.timedelta(seconds=(timestamp_exec_end - timestamp_exec_start))))

Total execution time:  6:17:51.805480
