<a href="https://colab.research.google.com/github/kamilo116/KNN/blob/master/KNN3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np 
import pandas as pd
import os
import csv
import sys
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.pyplot import imshow
%matplotlib inline
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import TensorDataset, DataLoader,Dataset
from torch.utils.data.sampler import SubsetRandomSampler

import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as T
import torchvision.transforms as transforms
from torchvision import models
import timeit

np.random.seed(4) 
torch.manual_seed(4) 
torch.cuda.manual_seed(4)

In [5]:
from google.colab import drive, files
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
! git clone https://github.com/wang-chen/kervolution.git 

sys.path.append("kervolution/")
from kervolution import Kerv2d


Cloning into 'kervolution'...
remote: Enumerating objects: 6, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 53 (delta 2), reused 0 (delta 0), pack-reused 47[K
Unpacking objects: 100% (53/53), done.


In [7]:
MALIGNANT_DATASET = '/content/drive/My Drive/Colab_data/malignant/malignant/'
BENIGN_DATASET = '/content/drive/My Drive/Colab_data/benign/benign/'
DATA_FOLDER = '/content/drive/My Drive/Colab_data/'
benign_file_list = os.listdir(BENIGN_DATASET)
malignant_file_list = os.listdir(MALIGNANT_DATASET)

print(f"Number of benign {len(benign_file_list)} images")
print(f"Number of malignant {len(malignant_file_list)} images")

data_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])



Number of benign 1480 images
Number of malignant 1293 images


In [0]:
LIMIT_IMAGES_NUM = 900
benign_dict = {filename: 0 for filename in benign_file_list[:LIMIT_IMAGES_NUM]}
malignant_dict = {filename: 1 for filename in malignant_file_list[:LIMIT_IMAGES_NUM]}
img_class_dict = {**benign_dict , **malignant_dict}
labeled_data = pd.Series(img_class_dict)

In [0]:
class IsicDataset(Dataset):
    def __init__(self, data_folder, labeled_data, 
                 transform=transforms.Compose([transforms.ToTensor()])):
        self.labeled_data = labeled_data
        self.transform = transform
        self.data_folder = data_folder
        
        
    def __len__(self):
        return len(self.labeled_data)

    def __getitem__(self, index):
        label = self.labeled_data[index]
        if label == 0:
          image = Image.open(os.path.join(self.data_folder, "benign", "benign", index ))
        else:
          image = Image.open(os.path.join(self.data_folder, "malignant", "malignant", index ))
        image = self.transform(image)
        return image, label

    @property
    def labels(self):
      return self.labeled_data


In [10]:
 
dataset = IsicDataset(DATA_FOLDER, labeled_data, transform=data_transforms)
#test_set = ... todo
print(dataset.labels)

X_train, X_test = train_test_split(dataset.labels, test_size=0.2)
print("number of training data: ",len(X_train))
print("number of testing  data: ",len(X_test))

train_sampler = SubsetRandomSampler(list(X_train.index))
valid_sampler = SubsetRandomSampler(list(X_test.index))
batch_size = 64
num_workers = 0

train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)



ISIC_0000612.jpeg    0
ISIC_0000706.jpeg    0
ISIC_0000704.jpeg    0
ISIC_0000679.jpeg    0
ISIC_0000652.jpeg    0
                    ..
ISIC_0025409.jpg     1
ISIC_0025394.jpg     1
ISIC_0024729.jpg     1
ISIC_0025248.jpg     1
ISIC_0025391.jpg     1
Length: 1800, dtype: int64
number of training data:  1440
number of testing  data:  360


In [0]:
avg_loss_list = []
acc_list = []

def train(model, train_loader ,loss_fn, optimizer, num_epochs = 1):
    total_loss =0

    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()

        for t, (x, y) in enumerate(train_loader):
            x_var = Variable(x.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())
            scores = model(x_var)
            loss = loss_fn(scores, y_var)
            total_loss += loss.data
            
            if (t + 1) % print_every == 0:
                avg_loss = total_loss/print_every
                print('t = %d, avg_loss = %.4f' % (t + 1, avg_loss) )
                avg_loss_list.append(avg_loss)
                total_loss = 0
                

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        acc = check_accuracy(fixed_model_gpu, valid_loader)
        print('acc = %f' %(acc))
            
def check_accuracy(model, loader):
    print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval() 
    for x, y in loader:
        x_var = Variable(x.type(gpu_dtype))

        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    acc_list.append(acc)
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    return acc
    

In [0]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(N, -1)  

In [0]:
print_every = 1
gpu_dtype = torch.cuda.FloatTensor

out_1 = 64
#out_2 = 48
out_3 = 32
#out_4 = 24
out_5 = 16

k_size_1 = 3
padding_1 = 1
in_channels = 3

num_epochs = 50

'''
Kerv2d
kervolution with following options:
kernel_type: [linear, polynomial, gaussian, etc.]
default is convolution:
          kernel_type --> linear,
balance, power, gamma is valid only when the kernel_type is specified
if learnable_kernel = True,  they just be the initial value of learable parameters
if learnable_kernel = False, they are the value of kernel_type's parameter
the parameter [power] cannot be learned due to integer limitation
balance: 0, 1
power: 3, 4, 5
gamma:
'''
fixed_model_base = nn.Sequential( 
                nn.Kerv2d(in_channels, out_1, padding= padding_1, kernel_size=k_size_1, 
                          stride=1, kernel_type='gaussian', learnable_kernel=True,
                          kernel_regularizer=True, balance=0, power=3, gamma=1), 
                nn.BatchNorm2d(out_1),
                nn.AvgPool2d(2, stride=2),
                
                
                nn.Conv2d(out_1 , out_3, padding= padding_1, kernel_size=k_size_1, stride=1), 
                #nn.BatchNorm2d(out_2),
                #nn.Conv2d(out_2 , out_3, padding= padding_1, kernel_size=k_size_1, stride=1), 
                nn.BatchNorm2d(out_3),
                nn.AvgPool2d(2, stride=2),
                nn.Conv2d(out_3 , out_5, padding= padding_1, kernel_size=k_size_1, stride=1), 
                #nn.BatchNorm2d(out_4),
                #nn.Conv2d(out_4 , out_5, padding= padding_1, kernel_size=k_size_1, stride=1), 
                nn.BatchNorm2d(out_5),
                nn.Dropout(0.5),
                Flatten(),
                nn.Linear(4096,64),
                nn.Linear(64,10),
                nn.Linear(10,2)
            )
fixed_model_gpu = fixed_model_base.type(gpu_dtype)
print(fixed_model_gpu)
loss_fn = nn.modules.loss.CrossEntropyLoss()
optimizer = optim.Adam(fixed_model_gpu.parameters(), lr = 0.0001) 


train(fixed_model_gpu, train_loader ,loss_fn, optimizer, num_epochs=num_epochs)
check_accuracy(fixed_model_gpu, valid_loader)

Sequential(
  (0): Kerv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Dropout(p=0.5, inplace=False)
  (9): Flatten()
  (10): Linear(in_features=4096, out_features=64, bias=True)
  (11): Linear(in_features=64, out_features=10, bias=True)
  (12): Linear(in_features=10, out_features=2, bias=True)
)
Starting epoch 1 / 50
t = 1, avg_loss = 0.6798
t = 2, avg_loss = 0.7957
t = 3, avg_loss = 0.6398
t = 4, avg_loss = 0.5630
t = 5, avg_loss = 0.5710
t = 6, avg_loss = 0.73