<a href="https://colab.research.google.com/github/peter-ohara/chicken_tinder/blob/master/chicken_tinder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!git clone git@github.com:peter-ohara/chicken_tinder.git

Cloning into 'chicken_tinder'...
Host key verification failed.
fatal: Could not read from remote repository.

Please make sure you have the correct access rights
and the repository exists.


In [1]:
# Imports here
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
import torchvision.models as models
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch
from PIL import Image
import numpy as np

import uuid

from cross_validation import img_train_test_split
from datasets import download_images_from_url_file, cleanup_images

ModuleNotFoundError: ignored

In [0]:

normal_urls_file = 'normal_urls.txt'
abnormal_urls_file = 'abnormal_urls.txt'

img_source_dir = f'raw_data_{uuid.uuid4().hex[:8]}'
normal_images_dir = f'{img_source_dir}/normal'
abnormal_images_dir = f'{img_source_dir}/abnormal'

test_size = 0.2

# Download normal images from google
download_images_from_url_file(normal_urls_file, normal_images_dir)

# Download abnormal images from google
download_images_from_url_file(abnormal_urls_file, abnormal_images_dir)


# Delete corrupt files
cleanup_images(normal_images_dir)
cleanup_images(abnormal_images_dir)

# # Split into training, validation and testing
img_train_test_split(img_source_dir, test_size, train_dir='train', test_dir='validation_and_test')
img_train_test_split('data/validation_and_test', 0.5, train_dir='validation', test_dir='test')

In [0]:
data_dir = "./drive/My Drive/Colab Notebooks/data"
train_dir = data_dir + "/train"
valid_dir = data_dir + "/validation"
test_dir = data_dir + "/train"

In [0]:
# Define transforms for the training, validation, and testing sets
train_transform = transforms.Compose([
    transforms.RandomRotation(45),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
   ])

valid_transform = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# Load the datasets with ImageFolder
train_dataset = datasets.ImageFolder(train_dir, transform=train_transform)
valid_dataset = datasets.ImageFolder(valid_dir, transform=valid_transform)
test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

# Using the image datasets and the trainforms, define the dataloaders
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

In [0]:
cat_to_name = {"normal": "normal", "abnormal": "abnormal"}

In [0]:
# TODO: Build and train your network
def build_model():
    model = models.vgg16(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False

    model.classifier = nn.Sequential(nn.Linear(25088, 512),
                                     nn.ReLU(),
                                     nn.Dropout(p=0.2),
                                     nn.Linear(512, 256),
                                     nn.ReLU(),
                                     nn.Dropout(p=0.2),
                                     nn.Linear(256, 2),
                                     nn.LogSoftmax(dim=1))
    
    return model

In [0]:
def validate(dataloader, model, criterion):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # Disable dropouts before evaluation
    model.eval()

    running_loss = 0
    accuracy = 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        # Disable gradient calculations when evaluating
        with torch.no_grad():
            
            # Calculate loss
            logps = model(images)
            running_loss += criterion(logps, labels)
            
            # Calculate accuracy
            ps = torch.exp(logps)
            top_ps, top_class = ps.topk(1, dim=1)
            matches = top_class == labels.view(*top_class.shape)
            accuracy += torch.mean(matches.type(torch.FloatTensor))

    # Enable dropouts after evaluation
    model.train()

    loss = running_loss/len(dataloader)
    accuracy = accuracy/len(dataloader)
    return loss, accuracy

In [0]:
model = build_model()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.03)

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print_metrics_every = 1
steps = 0
running_loss = 0
epochs = 5
for epoch in range(epochs):
    model.to("cuda")

    for images, labels in trainloader:
        steps += 1
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        logps = model(images)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if steps % print_metrics_every == 0:
            training_loss = running_loss/print_metrics_every
            validation_loss, validation_accuracy = validate(validloader, model, criterion)

            print(f"Epoch: {epoch+1}/{epochs}... Training loss: {training_loss:.3f}... Validation loss: {validation_loss:.3f}... Validation Accuracy: {validation_accuracy:.3f}...")

            running_loss = 0

    model.to("cpu")

    model.class_to_idx = train_dataset.class_to_idx
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': training_loss,
        'class_to_idx': model.class_to_idx
    }
    torch.save(checkpoint, './drive/My Drive/Colab Notebooks/checkpoint.tar')

Epoch: 1/5... Training loss: 0.706... Validation loss: 1006.962... Validation Accuracy: 0.940...
Epoch: 1/5... Training loss: 693.962... Validation loss: 431.271... Validation Accuracy: 0.940...
Epoch: 1/5... Training loss: 293.635... Validation loss: 99.941... Validation Accuracy: 0.940...
Epoch: 1/5... Training loss: 0.000... Validation loss: 60.425... Validation Accuracy: 0.940...
Epoch: 1/5... Training loss: 0.000... Validation loss: 39.640... Validation Accuracy: 0.941...
Epoch: 2/5... Training loss: 73.052... Validation loss: 97.867... Validation Accuracy: 0.940...
Epoch: 2/5... Training loss: 0.000... Validation loss: 141.567... Validation Accuracy: 0.940...
Epoch: 2/5... Training loss: 67.248... Validation loss: 120.831... Validation Accuracy: 0.944...
Epoch: 2/5... Training loss: 43.974... Validation loss: 106.309... Validation Accuracy: 0.942...
Epoch: 2/5... Training loss: 0.000... Validation loss: 93.112... Validation Accuracy: 0.941...
Epoch: 3/5... Training loss: 35.899..

KeyboardInterrupt: ignored

In [0]:
# model.to("cpu")
# torch.save(checkpoint, './drive/My Drive/Colab Notebooks/checkpoint.tar')

# dataiter = iter(testloader)
# images, labels = dataiter.next()

# traced_script_module = torch.jit.trace(model, images)
# traced_script_module.save("model.pt")

!mv model.pt './drive/My Drive/Colab Notebooks'

In [0]:
# TODO: Do validation on the test set
test_loss, accuracy = validate(testloader, model, criterion)
print("Test loss: {:.3f}...".format(test_loss),
      "Test Accuracy: {:.3f}...".format(accuracy))

In [0]:
# TODO: Save the checkpoint
# model.class_to_idx = train_dataset.class_to_idx
# checkpoint = {
#     'epoch': epoch,
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(),
#     'loss': training_loss,
#     'class_to_idx': model.class_to_idx
# }
# torch.save(checkpoint, './drive/My Drive/Colab Notebooks/checkpoint.tar')

In [0]:
# TODO: Write a function that loads a checkpoint and rebuilds the model
# def load_checkpoint(filepath):
#     model = build_model()
#     optimizer = optim.Adam(model.classifier.parameters(), lr=0.03)

#     checkpoint = torch.load(filepath)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

#     epoch = checkpoint['epoch']
#     loss = checkpoint['loss']
    
#     model.class_to_idx = checkpoint['class_to_idx']
#     model.eval()

#     return model, optimizer, epoch, loss

In [0]:
# TODO: Process a PIL image for use in a PyTorch model
def process_image(image):
    ''' Scales, crops, and normalizes a PIL image for a PyTorch model,
        returns an Numpy array
    '''

    # Resize to 256x256    
    width, height = image.size
    aspect_ratio = width/height

    if height > width:
        new_width = 256
        new_height = int(256/aspect_ratio)
    else:
        new_width = int(256*aspect_ratio)
        new_height = 256
        
    display(image)
    print(image.size)
    image = image.resize((new_width, new_height))
    display(image)
    print(image.size)

    # Centre crop to 224x224
    width, height = image.size
    new_width, new_height = 224, 224
    
    left = width//2 - new_width//2
    upper = height//2 - new_height//2
    right = width//2 + new_width//2
    lower = height//2 + new_height//2
    
    image = image.crop((left, upper, right, lower))

    # Convert to float
    np_image = np.array(image)
    np_image = np_image / 255
    
    # Normalize image
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    np_image = (np_image - mean) / std
    np_image = np.transpose(np_image, axes=[2,0,1])    
    
    return torch.from_numpy(np_image)

In [0]:
def imshow(image, ax=None, title=None):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    
    # PyTorch tensors assume the color channel is the first dimension
    # but matplotlib assumes is the third dimension
    image = image.numpy().transpose((1, 2, 0))
    
    # Undo preprocessing
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean
    
    # Image needs to be clipped between 0 and 1 or it looks like noise when displayed
    image = np.clip(image, 0, 1)
    
    ax.imshow(image)
    
    return ax

In [0]:
# TODO: Implement the code to predict the class from an image file
def predict(image_path, model, topk=2):
    ''' Predict the class (or classes) of an image using a trained deep learning model.
    '''
    image = Image.open(image_path)
    image = process_image(image).unsqueeze(dim=0)
    image = image.type(torch.FloatTensor)
    logps = model(image)
    ps = torch.exp(logps)
    top_ps, top_class = ps.topk(topk, dim=1)
    
    idx_to_class = {v: k for k, v in model.class_to_idx.items()}
    
    top_ps = [top_p.item() for top_p in top_ps.squeeze()]
    top_class = [idx_to_class[top_class.item()] for top_class in top_class.squeeze()]
    return top_ps, top_class
        
model, optimizer, epoch, training_loss = load_checkpoint('./drive/My Drive/Colab Notebooks/checkpoint.tar')

image_path  = './drive/My Drive/Colab Notebooks/chickens/normal/2.jpg'
probs, classes = predict(image_path, model)
print(probs)
print(classes)

In [0]:
# TODO: Display an image along with the top 5 classes
fig, (ax1, ax2) = plt.subplots(figsize=(6,9), nrows=2)

image = Image.open(image_path)
image = process_image(image)

ax1.set_title(cat_to_name[classes[0]])
imshow(image, ax1)

ax2.barh(np.arange(5), probs)
ax2.set_aspect(0.2)
ax2.set_yticks(np.arange(5))

labels = [cat_to_name[klass] for klass in classes]
ax2.set_yticklabels(labels);
ax2.set_xlim(0, 1.1)

plt.tight_layout()