In [0]:
from google.colab import drive
#mount your drive.  Complete Oauth to authenticate
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
#unzip image folder
!unzip -uq "/content/gdrive/My Drive/jpegs.zip" -d "/content/gdrive/My Drive/"

In [0]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os

# Training set values (same for all data)
data_means = [0.6786, 0.6413, 0.6605]
data_stds = [0.2012, 0.2080, 0.1997]

transformations = transforms.Compose([
#    transforms.Resize(255),
#    transforms.CenterCrop(224),
    transforms.ToTensor(),  # Transforms channels from 0- 255 -> 0-1.
    transforms.Normalize(mean=data_means, std=data_stds)])

train_set = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN", transform=transformations)
print("Train set size: ", len(train_set))
#train_set_E = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/EOSINOPHIL", transform=transformations)
#train_set_L = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/LYMPHOCYTE", transform=transformations)
#train_set_M = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/MONOCYTE", transform=transformations)
#train_set_N = datasets.ImageFolder("/content/gdrive/My Drive/TRAIN/NEUTROPHIL", transform=transformations)

num_folds = 4
folds = []
for i in range(num_folds):
    fold_length = int(len(train_set) / (1.*num_folds - i))
    train_set, new_fold = torch.utils.data.random_split(train_set, [len(train_set) - fold_length, fold_length])
    folds.append(new_fold)
    print("Fold size: ", len(new_fold))

# Will load data
#train_loader = torch.utils.data.DataLoader(train_set, batch_size=25, shuffle=True)

test_set = datasets.ImageFolder("/content/gdrive/My Drive/TEST", transform=transformations)
test_loader = torch.utils.data.DataLoader(test_set, batch_size =25, shuffle=True)

# Options: MOST GENERALIZED - 121, 169, 201, 161 - MOST ACCURATE
# https://pytorch.org/hub/pytorch_vision_densenet/
model = models.densenet161(pretrained=True)


for param in model.parameters():
    param.requires_grad = False
classifier_input = model.classifier.in_features
num_labels = 4
classifier = nn.Sequential(nn.Linear(classifier_input, 1024),
                           nn.ReLU(),
                           nn.Linear(1024, 512),
                           nn.ReLU(),
                           nn.Linear(512, num_labels),
                           nn.LogSoftmax(dim=1))


model.classifier = classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

criterion = nn.NLLLoss()
# Set the optimizer function using torch.optim as optim library
optimizer = optim.Adam(model.classifier.parameters())

epochs = 10
for epoch in range(epochs):
    ep_train_loss = 0
    ep_val_loss = 0
    ep_accuracy = 0

    for i in range(num_folds):

        train_loss = 0
        val_loss = 0
        accuracy = 0

        # Constructing training and validation sets

        train_set = -1
        for j in range(num_folds):
            if j != i:
                if train_set == -1:
                    train_set = folds[j]
                else:
                    train_set = torch.utils.data.ConcatDataset([train_set, folds[j]])
        val_set = folds[i]
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=25, shuffle=True)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size=25, shuffle=True)

        # Training the model
        model.train()
        counter = 0
        for inputs, labels in train_loader:
            # Move to device
            inputs, labels = inputs.to(device), labels.to(device)
            # Clear optimizers
            optimizer.zero_grad()
            # Forward pass
            output = model.forward(inputs)
            # Loss
            loss = criterion(output, labels)
            # Calculate gradients (backpropogation)
            loss.backward()
            # Adjust parameters based on gradients
            optimizer.step()
            # Add the loss to the training set's rnning loss
            train_loss += loss.item()*inputs.size(0)

            # Print the progress of our training
            counter += 1
            print(counter, "/", len(train_loader))

        # Evaluating the model
        model.eval()
        counter = 0
        # Tell torch not to calculate gradients
        with torch.no_grad():
            for inputs, labels in val_loader:
                # Move to device
                inputs, labels = inputs.to(device), labels.to(device)
                # Forward pass
                output = model.forward(inputs)
                # Calculate Loss
                valloss = criterion(output, labels)
                # Add loss to the validation set's running loss
                val_loss += valloss.item()*inputs.size(0)

                # Since our model outputs a LogSoftmax, find the real
                # percentages by reversing the log function
                output = torch.exp(output)
                # Get the top class of the output
                top_p, top_class = output.topk(1, dim=1)
                # See how many of the classes were correct?
                equals = top_class == labels.view(*top_class.shape)
                # Calculate the mean (get the accuracy for this batch)
                # and add it to the running accuracy for this epoch
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

                # Print the progress of our evaluation
                counter += 1
                print(counter, "/", len(val_loader))

        # Get the average loss for the entire fold
        train_loss = train_loss/len(train_loader.dataset)
        val_loss = val_loss/len(val_loader.dataset)
        accuracy = accuracy/len(val_loader)
        # Add to epoch's running total for avg losses
        ep_train_loss += train_loss
        ep_val_loss += val_loss
        ep_accuracy += accuracy
        # Print out the information
        print('Fold Accuracy: ', accuracy, '\n')
        print('Epoch: {} \tFold: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch, i, train_loss, val_loss))

    # Get the average loss for the entire fold
    ep_train_loss = ep_train_loss/num_folds
    ep_val_loss = ep_val_loss/num_folds
    ep_accuracy = ep_accuracy/num_folds
    # Print out the information
    print('\n\t*** Epoch Accuracy: ', ep_accuracy)
    print('\t*** Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \n'.format(epoch, ep_train_loss, val_loss))

torch.save(model, "./blood_model.py")


model.eval()
# Process our image
def process_image(image_path):
    # Load Image
    img = Image.open(image_path)

    # Get the dimensions of the image
    width, height = img.size

    # Resize by keeping the aspect ratio, but changing the dimension
    # so the shortest size is 255px
    # img = img.resize((255, int(255*(height/width))) if width < height else (int(255*(width/height)), 255))

    # Get the dimensions of the new image size
    width, height = img.size

    # Set the coordinates to do a center crop of 224 x 224
    #left = (width - 224)/2
    #top = (height - 224)/2
    #right = (width + 224)/2
    #bottom = (height + 224)/2
    #img = img.crop((left, top, right, bottom))

    # Turn image into numpy array
    img = np.array(img)

    # Make the color channel dimension first instead of last
    img = img.transpose((2, 0, 1))

    # Make all values between 0 and 1
    img = img/255

    # Normalize based on the preset mean and standard deviation
    img[0] = (img[0] - data_means[0])/data_stds[0]
    img[1] = (img[1] - data_means[1])/data_stds[1]
    img[2] = (img[2] - data_means[2])/data_stds[2]

    # Add a fourth dimension to the beginning to indicate batch size
    img = img[np.newaxis,:]

    # Turn into a torch tensor
    image = torch.from_numpy(img)
    image = image.float()
    return image

# Using our model to predict the label
def predict(image, model):
    # Pass the image through our model
    output = model.forward(image)

    # Reverse the log function in our output
    output = torch.exp(output)

    # Get the top predicted class, and the output percentage for
    # that class
    probs, classes = output.topk(1, dim=1)
    return probs.item(), classes.item()

# Show Image
def show_image(image):
    # Convert image to numpy
    image = image.numpy()

    # Un-normalize the image with avg std and mean
    image[0] = image[0] * 0.2030 + 0.6601

    # Print the image
    fig = plt.figure(figsize=(25, 4))
    plt.imshow(np.transpose(image[0], (1, 2, 0)))



Train set size:  9957
Fold size:  2489
Fold size:  2489
Fold size:  2489
Fold size:  2490
1 / 299
2 / 299
3 / 299
4 / 299
5 / 299
6 / 299
7 / 299
8 / 299
9 / 299
10 / 299
11 / 299
12 / 299
13 / 299
14 / 299
15 / 299
16 / 299
17 / 299
18 / 299
19 / 299
20 / 299
21 / 299
22 / 299
23 / 299
24 / 299
25 / 299
26 / 299
27 / 299
28 / 299
29 / 299
30 / 299
31 / 299
32 / 299
33 / 299
34 / 299
35 / 299
36 / 299
37 / 299
38 / 299
39 / 299
40 / 299
41 / 299
42 / 299
43 / 299
44 / 299
45 / 299
46 / 299
47 / 299
48 / 299
49 / 299
50 / 299
51 / 299
52 / 299
53 / 299
54 / 299
55 / 299
56 / 299
57 / 299
58 / 299
59 / 299
60 / 299
61 / 299
62 / 299
63 / 299
64 / 299
65 / 299
66 / 299
67 / 299
68 / 299
69 / 299
70 / 299
71 / 299
72 / 299
73 / 299
74 / 299
75 / 299
76 / 299
77 / 299
78 / 299
79 / 299
80 / 299
81 / 299
82 / 299
83 / 299
84 / 299
85 / 299
86 / 299
87 / 299
88 / 299
89 / 299
90 / 299
91 / 299
92 / 299
93 / 299
94 / 299
95 / 299
96 / 299
97 / 299
98 / 299
99 / 299
100 / 299
101 / 299
102 / 29