# DUI DNN
Deep Urban Interaction - Deep Neural Network  
Interaction Classification with OpenCV, OpenPose, and PyTorch  
Ryan Yan Zhang <ryanz@mit.edu>  
City Science, MIT Media Lab  

In [1]:
# General Imports
from pprint import pprint
import random
import datetime
import time

from IPython.core.debugger import set_trace

import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

## Image from video with OpenCV

## Pose json from image with OpenPose

## People Bounding box from OpenPose

## Interaction Classification with PyTorch DNN

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import torchvision
from torchvision.datasets.folder import ImageFolder, default_loader
from torchvision import models

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device: {device}')

device: cuda:0


In [4]:
# Training helpers
def get_trainable(model_params):
    return (p for p in model_params if p.requires_grad)


def get_frozen(model_params):
    return (p for p in model_params if not p.requires_grad)


def all_trainable(model_params):
    return all(p.requires_grad for p in model_params)


def all_frozen(model_params):
    return all(not p.requires_grad for p in model_params)


def freeze_all(model_params):
    for param in model_params:
        param.requires_grad = False

In [10]:
# Data augmentation transforms

from torchvision import transforms

IMG_SIZE = 224  #224  #defined by NN model input
_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]


train_trans = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),  #256  #(IMG_SIZE, IMG_SIZE)  # some images are pretty small
    #transforms.RandomCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(.3, .3, .3),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])
val_trans = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),  #256  #(IMG_SIZE, IMG_SIZE)
    #transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])

In [11]:
# Data set
train_ds = ImageFolder("../data/raw/DUI/train", transform=train_trans, loader=default_loader)
val_ds = ImageFolder("../data/raw/DUI/valid", transform=train_trans, loader=default_loader)
#print(f'len(train_ds): {len(train_ds)}, len(val_ds): {len(val_ds)}')

BATCH_SIZE = 128  #2  #256  #512  #32  #220 for resnet152 on Dell Presison 5520 laptop, 400 for resnet18

n_classes = 2

# DataLoader
train_dl = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
)
val_dl = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
)

In [14]:
# Model
model = models.resnet18(pretrained=True)
#model = models.resnet50(pretrained=True)
#model = models.resnet101(pretrained=True)
#model = models.resnet152(pretrained=True)

# Transfer learning or whole model training
# Opt.1 Transfer learning
'''
# Freeze all parameters
for param in model.parameters():
    param.requires_grad = False
#print(fall_frozen(model.parameters()): {all_frozen(model.parameters())}')

model.fc = nn.Linear(512, n_classes)  # according to the model, 512 for resnet18, 2048 for resnet50 & resnet101 & resnet152

model = model.to(device)
'''

# Opt.2 Whole model training
for param in model.parameters():
    param.requires_grad = True

In [36]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.Adam(
    get_trainable(model.parameters()),
    # model.fc.parameters(),
    lr=0.001,
    # momentum=0.9,
)

In [37]:
# Train loop
if False:
    N_EPOCHS = 10  #1  #2  #10
    for epoch in range(N_EPOCHS):

        # start epoch
        start_time = time.time()
        start_datetime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(f"Epoch {epoch+1}/{N_EPOCHS}")
        print(f'  Start Time: {start_datetime}')

        # Train
        model.train()  # IMPORTANT

        running_loss, correct = 0.0, 0
        for X, y in train_dl:
            X, y = X.to(device), y.to(device)

            optimizer.zero_grad()
            # with torch.set_grad_enabled(True):
            y_ = model(X)
            loss = criterion(y_, y)

            loss.backward()
            optimizer.step()

            # Statistics
            print(f"    batch loss: {loss.item():0.3f}")
            _, y_label_ = torch.max(y_, 1)
            correct += (y_label_ == y).sum().item()
            running_loss += loss.item() * X.shape[0]

        print(f"  Train Loss: {running_loss / len(train_dl.dataset)}")
        print(f"  Train Acc:  {correct / len(train_dl.dataset)}")


        # Eval
        model.eval()  # IMPORTANT

        running_loss, correct = 0.0, 0
        with torch.no_grad():  # IMPORTANT
            for X, y in val_dl:
                X, y = X.to(device), y.to(device)

                y_ = model(X)

                _, y_label_ = torch.max(y_, 1)
                correct += (y_label_ == y).sum().item()

                loss = criterion(y_, y)
                running_loss += loss.item() * X.shape[0]

        print(f"  Valid Loss: {running_loss / len(val_dl.dataset)}")
        print(f"  Valid Acc:  {correct / len(val_dl.dataset)}")

        # end epoch
        end_time = time.time()
        end_datetime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        time_elapsed = end_time - start_time
        datetime_elapsed = str(datetime.timedelta(seconds = time_elapsed))
        print(f'  End Time: {end_datetime}')
        print(f'  Time Elapsed: {datetime_elapsed}')
        print()

In [38]:
# Predict with Trained Model


# save the trained model weights
model_weights_path = '../data/saved_model_weights/resnet18_whole'

# save a trained model weights
if False:
    torch.save(model.state_dict(), model_weights_path)

# load the trained model weights
if True:
    from torchvision import models
    model = models.resnet18(pretrained=True)  # resnet50, 101, 152
    model.load_state_dict(torch.load(model_weights_path))
    model = model.to(device)


# test data set
test_ds = ImageFolder("../data/raw/DUI/test", transform=val_trans, loader=default_loader)
#print(f'len(test_ds) = {len(test_ds)}. ')

test_dl = DataLoader(
    test_ds,
    batch_size=1,
    shuffle=False,
    num_workers=4,
)

#print(f'test_ds[99]: \n{test_ds[99]}')
#print(f'test_ds[99][1]: \n{test_ds[99][1]}')


# predict
model.eval()  # IMPORTANT
with torch.no_grad():  # IMPORTANT
    for X, y in test_dl:
        X, y = X.to(device), y.to(device)
        #print(f'y: \t\t\t{y}')

        y_ = model(X)
        _, y_label_ = torch.max(y_, 1)
        #print(f'y_label_: \t\t{y_label_}')
        
        is_correct = 'correct' if y_label_ == y else 'wrong'
        #print(f'is_correct: \t{is_correct}\n')