In [2]:
download_path='../data/'

In [8]:
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile
# Using Kaggle API to download data

api = KaggleApi()
api.authenticate()
dataset = 'paultimothymooney/chest-xray-pneumonia'
if dataset+".zip" in os.listdir(download_path):
    print("***Dataset present at local path. Skipping download***")
else:
    print("Downloading dataset...")
    api.dataset_download_files(dataset,path=download_path)
    print("Done")

Downloading dataset...
Done


In [10]:
print("Extracting training files...")
dataset = zipfile.ZipFile("../data/chest-xray-pneumonia.zip", 'r')
#dataset.extract("train.zip")
#train_dir = "../data/train/"
#trainZip = zipfile.ZipFile("train.zip", 'r')
#print(len(trainZip.namelist())-1,"training images found") #Removing 1 from len(trainZip.namelist()) since train/ is counted
dataset.extractall(path='../data/x-ray/')
print("Done")


# Cleaning up
# os.remove(dataset.filename) if you'd like to remove the original zip file from Kaggle as well

Extracting training files...


In [None]:
train_dir = '../data/chest_xray/train'
test_dir = '../data/chest_xray/test'

In [None]:
transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])])

transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])])

train_data = ImageFolder(root=train_dir, transform=transforms_train)
test_data = ImageFolder(root=test_dir, transform=transforms_test)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torchvision

class TeacherNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet18(pretrained=True)
        for params in self.model.parameters():
            params.requires_grad_ = False

        n_filters = self.model.fc.in_features
        self.model.fc = nn.Linear(n_filters, 2)

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def train(model, train_loader, test_loader, optimizer, criterion, device):
    dataloaders = {'train': train_loader, 'val': test_loader}

    for epoch in range(30):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm.tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

In [None]:
class StudentNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 4, kernel_size=3, padding=1),
            nn.BatchNorm2d(4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc = nn.Linear(4 * 112 * 112, 2)

    def forward(self, x):
        out = self.layer1(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [None]:
class DistillationLoss:
    def __init__(self):
        self.student_loss = nn.CrossEntropyLoss()
        self.distillation_loss = nn.KLDivLoss()
        self.temperature = 1
        self.alpha = 0.25

    def __call__(self, student_logits, student_target_loss, teacher_logits):
        distillation_loss = self.distillation_loss(F.log_softmax(student_logits / self.temperature, dim=1),
                                                   F.softmax(teacher_logits / self.temperature, dim=1))

        loss = (1 - self.alpha) * student_target_loss + self.alpha * distillation_loss
        return loss