In [0]:
from google.colab import drive
#mount your drive.  Complete Oauth to authenticate
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
#unzip image folder
!unzip -uq "/content/gdrive/My Drive/jpegs.zip" -d "/content/gdrive/My Drive/"

In [0]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os

# Training set values (same for all data)
data_means = [0.6786, 0.6413, 0.6605]
data_stds = [0.2012, 0.2080, 0.1997]

transformations = transforms.Compose([
#    transforms.Resize(255),
#    transforms.CenterCrop(224),
    transforms.ToTensor(),  # Transforms channels from 0- 255 -> 0-1.
    transforms.Normalize(mean=data_means, std=data_stds)])

full_train_set = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN", transform=transformations)
full_train_set, temp = torch.utils.data.random_split(full_train_set, [int(len(full_train_set) / 20), len(full_train_set) - int(len(full_train_set) / 20)])
full_train_loader = torch.utils.data.DataLoader(full_train_set, batch_size=25, shuffle=True)
print("Train set size: ", len(full_train_set))

#train_set_E = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/EOSINOPHIL", transform=transformations)
#train_set_L = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/LYMPHOCYTE", transform=transformations)
#train_set_M = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/MONOCYTE", transform=transformations)
#train_set_N = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/NEUTROPHIL", transform=transformations)

test_set = datasets.ImageFolder("/content/gdrive/My Drive/TEST", transform=transformations)
test_loader = torch.utils.data.DataLoader(test_set, batch_size =25, shuffle=True)

# Options: MOST GENERALIZED - 121, 169, 201, 161 - MOST ACCURATE
# https://pytorch.org/hub/pytorch_vision_densenet/
model = models.densenet161(pretrained=True)


for param in model.parameters():
    param.requires_grad = False
classifier_input = model.classifier.in_features
num_labels = 4
classifier = nn.Sequential(nn.Linear(classifier_input, 1024),
                           nn.ReLU(),
                           nn.Linear(1024, 512),
                           nn.ReLU(),
                           nn.Linear(512, num_labels),
                           nn.LogSoftmax(dim=1))


model.classifier = classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

# Can choose various loss functions to use.
criterion = nn.NLLLoss()
# Set the optimizer function using torch.optim as optim library
optimizer = optim.Adam(model.classifier.parameters())

epochs = 5
for epoch in range(epochs):
    ep_train_loss = 0
    ep_val_loss = 0
    ep_accuracy = 0

    for i in range(num_folds):

        train_loss = 0
        val_loss = 0
        accuracy = 0

        # Constructing training and validation sets
        num_folds = 5
        folds = []
        items = full_train_set
        for j in range(num_folds):
            fold_length = int(len(items) / (1.*num_folds - j))
            items, new_fold = torch.utils.data.random_split(items, [len(items) - fold_length, fold_length])
            folds.append(new_fold)

        train_set = -1
        for j in range(num_folds):
            if j != i:
                if train_set == -1:
                    train_set = folds[j]
                else:
                    train_set = torch.utils.data.ConcatDataset([train_set, folds[j]])
        val_set = folds[i]
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=25, shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size=25, shuffle=True)

        # Training the model
        model.train()
        counter = 0
        for inputs, labels in train_loader:
            # Move to device
            inputs, labels = inputs.to(device), labels.to(device)
            # Clear optimizers
            optimizer.zero_grad()
            # Forward pass
            output = model.forward(inputs)
            # Loss
            loss = criterion(output, labels)
            # Calculate gradients (backpropogation)
            loss.backward()
            # Adjust parameters based on gradients
            optimizer.step()
            # Add the loss to the training set's rnning loss
            train_loss += loss.item()*inputs.size(0)

            # Print the progress of our training
            counter += 1
            print(counter, "/", len(train_loader))

        # Evaluating the model
        model.eval()
        counter = 0
        # Tell torch not to calculate gradients
        with torch.no_grad():
            for inputs, labels in val_loader:
                # Move to device
                inputs, labels = inputs.to(device), labels.to(device)
                # Forward pass
                output = model.forward(inputs)
                # Calculate Loss
                valloss = criterion(output, labels)
                # Add loss to the validation set's running loss
                val_loss += valloss.item()*inputs.size(0)

                # Since our model outputs a LogSoftmax, find the real
                # percentages by reversing the log function
                output = torch.exp(output)
                # Get the top class of the output
                top_p, top_class = output.topk(1, dim=1)
                # See how many of the classes were correct?
                equals = top_class == labels.view(*top_class.shape)
                # Calculate the mean (get the accuracy for this batch)
                # and add it to the running accuracy for this epoch
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

                # Print the progress of our evaluation
                counter += 1
                print(counter, "/", len(val_loader))

        # Get the average loss for the entire fold
        train_loss = train_loss/len(train_loader.dataset)
        val_loss = val_loss/len(val_loader.dataset)
        accuracy = accuracy/len(val_loader)
        # Add to epoch's running total for avg losses
        ep_train_loss += train_loss
        ep_val_loss += val_loss
        ep_accuracy += accuracy
        # Print out the information
        print('\nFold Accuracy: ', accuracy)
        print('Epoch: {} \tFold: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \n'.format(epoch, i, train_loss, val_loss))

    # Get the average loss for the entire fold
    ep_train_loss = ep_train_loss/num_folds
    ep_val_loss = ep_val_loss/num_folds
    ep_accuracy = ep_accuracy/num_folds
    # Print out the information
    print('\n\t*** Epoch Accuracy: ', ep_accuracy)
    print('\t*** Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \n'.format(epoch, ep_train_loss, ep_val_loss))


full_train_loss = 0
test_loss = 0
test_accuracy = 0

# Training the model one final time on the full dataset
model.train()
counter = 0
for inputs, labels in full_train_loader:
    # Move to device
    inputs, labels = inputs.to(device), labels.to(device)
    # Clear optimizers
    optimizer.zero_grad()
    # Forward pass
    output = model.forward(inputs)
    # Loss
    loss = criterion(output, labels)
    # Calculate gradients (backpropogation)
    loss.backward()
    # Adjust parameters based on gradients
    optimizer.step()
    # Add the loss to the training set's rnning loss
    full_train_loss += loss.item()*inputs.size(0)

    # Print the progress of our training
    counter += 1
    print(counter, "/", len(full_train_loader))

# Saving the model
torch.save(model, "./blood_model.py")


model.eval()
counter = 0
# Tell torch not to calculate gradients
with torch.no_grad():
    for inputs, labels in test_loader:
        # Move to device
        inputs, labels = inputs.to(device), labels.to(device)
        # Forward pass
        output = model.forward(inputs)
        # Calculate Loss
        testloss = criterion(output, labels)
        # Add loss to the validation set's running loss
        test_loss += testloss.item()*inputs.size(0)

        # Since our model outputs a LogSoftmax, find the real
        # percentages by reversing the log function
        output = torch.exp(output)
        # Get the top class of the output
        top_p, top_class = output.topk(1, dim=1)
        # See how many of the classes were correct?
        equals = top_class == labels.view(*top_class.shape)
        # Calculate the mean (get the accuracy for this batch)
        # and add it to the running accuracy for this epoch
        test_accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

        # Print the progress of our evaluation
        counter += 1
        print(counter, "/", len(test_loader))

# Get the average loss for the entire fold
full_train_loss = full_train_loss/len(full_train_loader.dataset)
test_loss = test_loss/len(test_loader.dataset)
test_accuracy = test_accuracy/len(test_loader)
# Print out the information
print('Test Set Accuracy: ', test_accuracy)
print('Training Loss: {:.6f} \tTesting Loss: {:.6f} \n'.format(full_train_loss, test_loss))


# Process our image
def process_image(image_path):
    # Load Image
    img = Image.open(image_path)

    # Get the dimensions of the image
    width, height = img.size

    # Resize by keeping the aspect ratio, but changing the dimension
    # so the shortest size is 255px
    # img = img.resize((255, int(255*(height/width))) if width < height else (int(255*(width/height)), 255))

    # Get the dimensions of the new image size
    width, height = img.size

    # Set the coordinates to do a center crop of 224 x 224
    #left = (width - 224)/2
    #top = (height - 224)/2
    #right = (width + 224)/2
    #bottom = (height + 224)/2
    #img = img.crop((left, top, right, bottom))

    # Turn image into numpy array
    img = np.array(img)

    # Make the color channel dimension first instead of last
    img = img.transpose((2, 0, 1))

    # Make all values between 0 and 1
    img = img/255

    # Normalize based on the preset mean and standard deviation
    img[0] = (img[0] - data_means[0])/data_stds[0]
    img[1] = (img[1] - data_means[1])/data_stds[1]
    img[2] = (img[2] - data_means[2])/data_stds[2]

    # Add a fourth dimension to the beginning to indicate batch size
    img = img[np.newaxis,:]

    # Turn into a torch tensor
    image = torch.from_numpy(img)
    image = image.float()
    return image

# Using our model to predict the label
def predict(image, model):
    # Pass the image through our model
    output = model.forward(image)

    # Reverse the log function in our output
    output = torch.exp(output)

    # Get the top predicted class, and the output percentage for
    # that class
    probs, classes = output.topk(1, dim=1)
    return probs.item(), classes.item()

# Show Image
def show_image(image):
    # Convert image to numpy
    image = image.numpy()

    # Un-normalize the image with avg std and mean
    image[0] = image[0] * 0.2030 + 0.6601

    # Print the image
    fig = plt.figure(figsize=(25, 4))
    plt.imshow(np.transpose(image[0], (1, 2, 0)))



Train set size:  497
1 / 16
2 / 16
3 / 16
4 / 16
5 / 16
6 / 16
7 / 16
8 / 16
9 / 16
10 / 16
11 / 16
12 / 16
13 / 16
14 / 16
15 / 16
16 / 16
1 / 4
2 / 4
3 / 4
4 / 4

Fold Accuracy:  0.25000000186264515
Epoch: 0 	Fold: 4 	Training Loss: 1.385595 	Validation Loss: 1.382909 

1 / 16
2 / 16
3 / 16
4 / 16
5 / 16
6 / 16
7 / 16
8 / 16
9 / 16
10 / 16
11 / 16
12 / 16
13 / 16
14 / 16
15 / 16
16 / 16
1 / 4
2 / 4
3 / 4
4 / 4

Fold Accuracy:  0.3500000014901161
Epoch: 0 	Fold: 4 	Training Loss: 1.334669 	Validation Loss: 1.252008 

1 / 16
2 / 16
3 / 16
4 / 16
5 / 16
6 / 16
7 / 16
8 / 16
9 / 16
10 / 16
11 / 16
12 / 16
13 / 16
14 / 16
15 / 16
16 / 16
1 / 4
2 / 4
3 / 4
4 / 4

Fold Accuracy:  0.5500000044703484
Epoch: 0 	Fold: 4 	Training Loss: 1.152502 	Validation Loss: 0.969666 

1 / 16
2 / 16
3 / 16
4 / 16
5 / 16
6 / 16
7 / 16
8 / 16
9 / 16
10 / 16
11 / 16
12 / 16
13 / 16
14 / 16
15 / 16
16 / 16
1 / 4
2 / 4
3 / 4
4 / 4

Fold Accuracy:  0.4399999976158142
Epoch: 0 	Fold: 4 	Training Loss: 1.101131 	Va

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


1 / 100
2 / 100
3 / 100
4 / 100
5 / 100
6 / 100
7 / 100
8 / 100
9 / 100
10 / 100
11 / 100
12 / 100
13 / 100
14 / 100
15 / 100
16 / 100
17 / 100
18 / 100
19 / 100
20 / 100
21 / 100
22 / 100
23 / 100
24 / 100
25 / 100
26 / 100
27 / 100
28 / 100
29 / 100
30 / 100
31 / 100
32 / 100
33 / 100
34 / 100
35 / 100
36 / 100
37 / 100
38 / 100
39 / 100
40 / 100
41 / 100
42 / 100
43 / 100
44 / 100
45 / 100
46 / 100
47 / 100
48 / 100
49 / 100
50 / 100
51 / 100
52 / 100
53 / 100
54 / 100
55 / 100
56 / 100
57 / 100
58 / 100
59 / 100
60 / 100
61 / 100
62 / 100
63 / 100
64 / 100
65 / 100
66 / 100
67 / 100
68 / 100
69 / 100
70 / 100
71 / 100
72 / 100
73 / 100
74 / 100
75 / 100
76 / 100
77 / 100
78 / 100
79 / 100
80 / 100
81 / 100
82 / 100
83 / 100
84 / 100
85 / 100
86 / 100
87 / 100
88 / 100
89 / 100
90 / 100
91 / 100
92 / 100
93 / 100
94 / 100
95 / 100
96 / 100
97 / 100
98 / 100
99 / 100
100 / 100

Overall Training Set Accuracy:  0.0
Test Set Accuracy:  0.5566333338618279
Training Loss: 0.293932 	Testing