In [1]:
import os
import numpy as np
import cv2

# PyTorch
import torch
from torch import cuda
import torch.nn as nn

from torchvision import models

In [2]:
# Location of data (.pth file must be located here)
print(os.getcwd())

# Whether to train on a GPU
train_on_gpu = cuda.is_available()
print(f'Train on GPU: {train_on_gpu}')

# Number of gpus
if train_on_gpu:
    gpu_count = cuda.device_count()
    print(f'{gpu_count} GPU(s) detected.')
    if gpu_count > 1:
        multi_gpu = True
    else:
        multi_gpu = False

C:\Users\roume\Documents\GitHub\American-Sign-Language_MNIST
Train on GPU: True
1 GPU(s) detected.


In [3]:
def load_checkpoint(path):
    """Load a PyTorch model checkpoint

    Params
    --------
        path (str): saved model checkpoint. Must start with `model_name-` and end in '.pth'

    Returns
    --------
        None, save the `model` to `path`

    """

    # Get the model name
    model_name = path.split('-')[0]
    assert (model_name in ['vgg16', 'resnet50'
                          ]), "Path must have the correct model name"
    
    # Load in checkpoint
    checkpoint = torch.load(path)
    #print(checkpoint)
    if model_name == 'vgg16':
        model = models.vgg16(pretrained=True)
        # Make sure to set parameters as not trainable
        for param in model.parameters():
            param.requires_grad = False
        model.classifier = checkpoint['classifier']

    elif model_name == 'resnet50':
        model = models.resnet50(pretrained=True)
        # Make sure to set parameters as not trainable
        for param in model.parameters():
            param.requires_grad = False
        model.fc = checkpoint['fc']

    # Load in the state dict
    model.load_state_dict(checkpoint['state_dict'])

    total_params = sum(p.numel() for p in model.parameters())
    print(f'{total_params:,} total parameters.')
    total_trainable_params = sum(
        p.numel() for p in model.parameters() if p.requires_grad)
    print(f'{total_trainable_params:,} total gradient parameters.')

    # Move to GPU
    if multi_gpu:
        model = nn.DataParallel(model)

    if train_on_gpu:
        model = model.to('cuda')

    # Model basics
    model.class_to_idx = checkpoint['class_to_idx']
    model.idx_to_class = checkpoint['idx_to_class']
    model.epochs = checkpoint['epochs']

    # Optimizer
    optimizer = checkpoint['optimizer']
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    return model, optimizer

In [4]:
def predict(image, model, topk=5):
    """Make a prediction for an image using a trained model

    Params
    --------
        image_path (str): filename of the image
        model (PyTorch model): trained model for inference
        topk (int): number of top predictions to return

    Returns

    """
    
    # Convert to PyTorch tensor
    img_tensor = image

    # Resize
    if train_on_gpu:
        img_tensor = img_tensor.view(1, 3, 224, 224).cuda()
    else:
        img_tensor = img_tensor.view(1, 3, 224, 224)

    # Set to evaluation
    with torch.no_grad():
        model.eval()
        # Model outputs log probabilities
        out = model(img_tensor)
        ps = torch.exp(out)

        # Find the topk predictions
        topk, topclass = ps.topk(topk, dim=1)

        # Extract the actual classes and probabilities
        top_classes = [
            model.idx_to_class[class_] for class_ in topclass.cpu().numpy()[0]
        ]
        top_p = topk.cpu().numpy()[0]

        return img_tensor.cpu().squeeze(), top_p, top_classes

In [5]:
def process_image(img):
    """Process an image path into a PyTorch tensor"""
    # Convert to numpy, transpose color dimension and normalize

    # ImageNet Standardization
    means = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
    stds = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))

    img = img - means
    img = img / stds

    img_tensor = torch.Tensor(img)

    return img_tensor

In [8]:
model, _ = load_checkpoint(path='resnet50-transfer-16-epochs-97%-training-97%-validation.pth')

resnet50
24,166,725 total parameters.
658,693 total gradient parameters.


In [None]:
cap = cv2.VideoCapture(0)
x1, y1, x2, y2 = 300, 300, 500, 500
while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    frame = frame / 256.0
    img_cropped = cv2.resize(frame, (224, 224), interpolation=cv2.INTER_AREA)
    
    x = np.transpose(img_cropped, (2, 0, 1))
    x = process_image(x) # torch.from_numpy(x).float()

    img, top_p, top_classes = predict(x, model)
    cv2.putText(img_cropped, '%s' % (top_classes[0]), (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 255, 255), 4)
    cv2.putText(img_cropped, '(score = %.5f)' % (float(top_p[0])), (100, 250), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255))
    cv2.rectangle(img_cropped, (x1, y1), (x2, y2), (255, 0, 0), 2)
    cv2.imshow("img", img_cropped)
    
    # Display the resulting frame
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything's done, release the capture
cap.release()
cv2.destroyAllWindows()