In [2]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import time
import os
import copy
from PIL import Image
from tqdm import tqdm
from scipy import stats
import pickle

In [3]:
device = torch.device('cuda:0')

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [5]:
def make_dataset(video_path, annotation_path, dataset, sample_duration):
    """
    Args:
        video_path (string): Directory containing videos.
        annotation_path (string): Name of annotation file.
        dataset (string): "val" or "test"
        sample_duration (int): Number of frames per segment.
    
    Need to return:
        data (list): List of {video_path, image_list, class_index} dicts of (string, list, int).
        class_names (dict): Dict with items (class_name, class_index).
    """
    with open(annotation_path, 'rb') as f:
        annotations = pickle.load(f)
    if dataset == 'val':
        class_indices = {
            'long_val': 0,
            'medium_val': 1,
            'close_up_val': 2
        }
    elif dataset == 'test':
        class_indices = {
            'long_test': 0,
            'medium_test': 1,
            'close_up_test': 2
        }
    
    def generate_image_list(segment_start, segment_end, sample_duration):
        # regularly sample images from the segment
        sample_duration = int(sample_duration)
        increment = int((segment_end - segment_start + 1) / sample_duration)
        if increment == 0:
            # use all images, duplicate images at the end
            return ([
                '{}.jpg'.format(i)
                for i in range(segment_start, segment_end + 1)
            ] + [ '{}.jpg'.format(segment_end) for i in range(sample_duration - (segment_end - segment_start + 1)) ])[:sample_duration]
        else:
            # iterate through with increment
            return [
                '{}.jpg'.format(i)
                for i in range(segment_start, segment_end + 1, increment)
            ][:sample_duration]
    data = [
        {
            'video_path': os.path.join(video_path, str(segment[0])),
            'image_list': generate_image_list(segment[1], segment[2], sample_duration),
            'class_index': class_indices[class_name]
        }
        for class_name in class_indices
        for segment, _ in annotations[class_name]
    ]

    class_names = ['long', 'medium', 'close_up']
    
    return data, class_names

In [6]:
class ImageDataset(Dataset):
    def __init__(self,
                 video_path,
                 annotation_path,
                 dataset,
                 sample_duration,
                 transform=None, 
                 stride = 1, 
                 max_size=None):
        self.data, self.class_names = make_dataset(
            video_path, annotation_path, dataset, sample_duration)
        
        paths = [
            os.path.join(seg['video_path'], img_path)
            for seg in self.data
            for img_path in seg['image_list']
        ]
        labels = [
            seg['class_index']
            for seg in self.data
            for img_path in seg['image_list']
        ]
        
        self.paths = paths[::stride]
        if max_size is not None:
            self.paths = self.paths[:max_size]
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img_name = self.paths[idx]
        img = Image.open(img_name).convert('RGB')
        
        img_tensor = self.transform(img)

        return img_tensor, self.labels[idx]

In [7]:
root_path = '/lfs/1/danfu/rekall_experiments/shot_scale_experiments/shot_scale'
video_path = os.path.join(root_path, 'images')
annotation_path = os.path.join(
    root_path, 'shot_scale_labels_and_rekall_accuracy_val_test.pkl')
image_datasets = {
    'train': ImageDataset(
        video_path, annotation_path, 'val', 16,
        transform = data_transforms['train'],
        stride=1, max_size=None
    ),
    'val': ImageDataset(
        video_path, annotation_path, 'val', 16,
        transform = data_transforms['val'],
        stride=1, max_size=None
    ),
    'test': ImageDataset(
        video_path, annotation_path, 'test', 16,
        transform = data_transforms['val'],
        stride=1, max_size=None
    )
}

In [8]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                              batch_size=4,
                                              shuffle='train' in x,
                                              num_workers=4,
                                              pin_memory=True)
                                              for x in image_datasets}

In [9]:
dataset_sizes = {
    x: len(image_datasets[x])
    for x in image_datasets
}

In [10]:
dataset_sizes

{'train': 7168, 'val': 7168, 'test': 7200}

In [11]:
def safe_divide(a, b):
    return a / b if b > 0 else 0

In [12]:
def train_model(model, criterion, optimizer, scheduler, train_dl, val_dl, test_dl=None,
                num_epochs=25, return_best=False, verbose=True, log_file=None):
    print(train_dl, val_dl, test_dl)
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0
    best_test_acc = 0.0
    
    phases = ['train', 'val', 'test'] if test_dl is not None else ['train', 'val']

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in phases:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
                dl = dataloaders[train_dl]
                dataset_size = dataset_sizes[train_dl]
                
            elif phase == 'val':
                model.eval()   # Set model to evaluate mode
                dl = dataloaders[val_dl]
                dataset_size = dataset_sizes[val_dl]
            else:
                model.eval()
                dl = dataloaders[test_dl]
                dataset_size = dataset_sizes[test_dl]

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dl:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            epoch_loss = running_loss / dataset_size
            epoch_acc = running_corrects.double() / dataset_size

            if verbose:
                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                    phase, epoch_loss, epoch_acc))
            if log_file is not None:
                log_file.write('Phase: {0}\t'
                               'Epoch: [{1}/{2}]\t'
                               'Loss: {loss_c:.4f}\t'
                               'Acc: {acc:.4f}\n'.format(
                                   phase, epoch + 1, num_epochs, loss_c=epoch_loss,
                                   acc=epoch_acc
                               ))
                log_file.flush()

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'test' and best_epoch == epoch:
                best_test_acc = epoch_acc

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    
    if return_best:
        print('Best epoch: {}'.format(best_epoch))
        print('Best val Acc: {:4f}'.format(best_acc))
        print('Test Acc: {:4f}'.format(best_test_acc))

        # load best model weights
        model.load_state_dict(best_model_wts)
    return model

In [None]:
%%time
path = 'models/transfer_learning'
for seed in range(5):
    torch.manual_seed(seed)
    model_ts = models.resnet50(pretrained=True)
    num_ftrs = model_ts.fc.in_features
    model_ts.fc = nn.Linear(num_ftrs, 3)

    model_ts = model_ts.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # Observe that all parameters are being optimized
    optimizer_ts = optim.SGD(model_ts.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler_ts = lr_scheduler.StepLR(optimizer_ts, step_size=7, gamma=0.1)
    
    if not os.path.exists(path):
        os.makedirs(path)
    with open(os.path.join(path, 'seed_{}.log'.format(seed)), 'w') as log_file:
        model_ts = train_model(model_ts, criterion, optimizer_ts, exp_lr_scheduler_ts,
                               'train', 'val', test_dl='test', num_epochs=25, verbose=True,
                               log_file=log_file, return_best=False)
        torch.save(model_ts.state_dict(), os.path.join(path, 'seed_{}.pth'.format(seed)))

train val test
Epoch 0/24
----------
train Loss: 0.8331 Acc: 0.6553
val Loss: 0.2663 Acc: 0.8968
test Loss: 0.6938 Acc: 0.7260

Epoch 1/24
----------
val Loss: 0.7797 Acc: 0.7552
test Loss: 1.9956 Acc: 0.4986

Epoch 2/24
----------
train Loss: 0.4852 Acc: 0.8096
val Loss: 0.5220 Acc: 0.8323
test Loss: 1.6526 Acc: 0.5094

Epoch 3/24
----------
train Loss: 0.4125 Acc: 0.8351
val Loss: 0.1764 Acc: 0.9357
test Loss: 1.3971 Acc: 0.6242

Epoch 4/24
----------
train Loss: 0.3964 Acc: 0.8428
val Loss: 0.2416 Acc: 0.9141
test Loss: 1.2268 Acc: 0.6326

Epoch 5/24
----------
train Loss: 0.3431 Acc: 0.8634
val Loss: 0.2513 Acc: 0.9074
test Loss: 1.4070 Acc: 0.6308

Epoch 6/24
----------
train Loss: 0.2575 Acc: 0.9016
val Loss: 0.2993 Acc: 0.9092
test Loss: 1.3705 Acc: 0.6086

Epoch 7/24
----------
train Loss: 0.2210 Acc: 0.9166
val Loss: 0.0550 Acc: 0.9820
test Loss: 1.1844 Acc: 0.6811

Epoch 8/24
----------
train Loss: 0.2101 Acc: 0.9220
val Loss: 0.0427 Acc: 0.9877
test Loss: 1.0306 Acc: 0.6975


train Loss: 0.1401 Acc: 0.9520
val Loss: 0.0230 Acc: 0.9929
test Loss: 1.2726 Acc: 0.6797

Epoch 23/24
----------
train Loss: 0.1365 Acc: 0.9513
val Loss: 0.0215 Acc: 0.9932
test Loss: 1.3512 Acc: 0.6776

Epoch 24/24
----------
train Loss: 0.1412 Acc: 0.9491
val Loss: 0.0243 Acc: 0.9920
test Loss: 1.4223 Acc: 0.6717

Training complete in 96m 28s
train val test
Epoch 0/24
----------
train Loss: 0.8165 Acc: 0.6662
val Loss: 0.2432 Acc: 0.9131
test Loss: 1.1056 Acc: 0.6690

Epoch 1/24
----------
train Loss: 0.5781 Acc: 0.7679
val Loss: 0.1656 Acc: 0.9411
test Loss: 0.7968 Acc: 0.7122

Epoch 2/24
----------
train Loss: 0.4578 Acc: 0.8179
val Loss: 0.1505 Acc: 0.9429
test Loss: 0.9478 Acc: 0.7137

Epoch 3/24
----------
train Loss: 0.3986 Acc: 0.8401
val Loss: 0.1180 Acc: 0.9618
test Loss: 1.0153 Acc: 0.6779

Epoch 4/24
----------
train Loss: 0.3497 Acc: 0.8616
val Loss: 0.1396 Acc: 0.9455
test Loss: 1.0796 Acc: 0.6874

Epoch 5/24
----------
train Loss: 0.3243 Acc: 0.8717
val Loss: 0.0828 Ac

# Evaluate

In [49]:
def pos_negs(predictions, gt):
    correct = np.where(np.array(predictions) == np.array(gt), 1, 0)
    incorrect = np.where(np.array(predictions) == np.array(gt), 0, 1)
    
    tp = np.where(correct * np.where(predictions == np.array(1), 1, 0), 1, 0)
    tn = np.where(correct * np.where(predictions == np.array(0), 1, 0), 1, 0)
    fp = np.where(incorrect * np.where(predictions == np.array(1), 1, 0), 1, 0)
    fn = np.where(incorrect * np.where(predictions == np.array(0), 1, 0), 1, 0)
    
    return tp, tn, fp, fn

def acc_prf1(predictions, gt):
    tp, tn, fp, fn = pos_negs(predictions, gt)
    
    acc = (np.sum(tp) + np.sum(tn)) / (np.sum(tp) + np.sum(tn) + np.sum(fp) + np.sum(fn))
    precision = np.sum(tp) / (np.sum(tp) + np.sum(fp))
    recall = np.sum(tp) / (np.sum(tp) + np.sum(fn))
    f1 = 2 * precision * recall / (precision + recall)
    
    return acc, precision, recall, f1, np.sum(tp), np.sum(tn), np.sum(fp), np.sum(fn)

def smooth_predictions(preds, window_radius = 3):
    result = []
    for i in range(len(preds)):
        start = max(0, i - window_radius)
        end = min(len(preds), i + window_radius)
        window = preds[start:end]
        result += [max(window, key=window.count)]
    
    return result

In [1]:
path = 'models/transfer_learning_tutorial'

log_file_img = open(os.path.join(
    path, 'test_results_image_classifier.log'), 'w')
log_file_smoothed = open(os.path.join(
    path, 'test_results_smoothed.log'), 'w')

for seed in range(5):
    print(seed)
    model_ts = models.resnet50(pretrained=True)
    num_ftrs = model_ts.fc.in_features
    model_ts.fc = nn.Linear(num_ftrs, 3)
    model_ts.load_state_dict(torch.load(
        os.path.join(path, 'seed_{}.pth'.format(seed))
    ))
    model_ts.to(device)
    criterion = nn.BCEWithLogitsLoss()
    
    model = model_ts.eval()   # Set model to evaluate mode
    dl = dataloaders['test']
    dataset_size = dataset_sizes['test']
    
    running_corrects = 0

    predictions = []
    gt_labels = []

    # Iterate over data.
    for inputs, labels in dl:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        predictions += preds.cpu().numpy().tolist()
        gt_labels += labels.cpu().numpy().tolist()

        # statistics
        running_corrects += torch.sum(preds == labels.data)

    epoch_acc = running_corrects.double() / dataset_size
    
    # Compute per-class P/R/F1
    def compute_prf1(preds, gts, class_name):
        tp = len([1 for pred, gt in zip(preds, gts) if gt == class_name and pred == gt])
        fp = len([1 for pred, gt in zip(preds, gts) if pred == class_name and pred != gt])
        fn = len([1 for pred, gt in zip(preds, gts) if gt == class_name and pred != gt])

        pre = tp / (tp + fp)
        rec = tp / (tp + fn)
        f1 = 2 * pre * rec  / (pre + rec)

        return pre, rec, f1, tp, fp, fn
    
    def smooth_predictions(preds, window_radius = 3, segment_length = 16):
        result = []
        for start_idx in range(0, len(preds), segment_length):
            for i in range(segment_length):
                start = max(0, i - window_radius) + start_idx
                end = min(len(preds), i + window_radius) + start_idx
                window = preds[start:end]
                result += [max(window, key=window.count)]

        return result
    
    results = []
    for class_name in [0, 1, 2]:
        results.append((class_name, compute_prf1(predictions, gt_labels, class_name)))
    
    avg_pre = np.mean([res[1][0] for res in results])
    avg_rec = np.mean([res[1][1] for res in results])
    avg_f1 = np.mean([res[1][2] for res in results])
    
    log_file_img.write('Seed: {0}\t'
                   'Pre: {pre:.4f}\t'
                   'Rec: {rec:.4f}\t'
                   'F1: {f1:.4f}\n'.format(
                       seed,
                       pre=avg_pre, rec=avg_rec,
                       f1=avg_f1
                   ))
    for class_name in [0, 1, 2]:
        log_file_img.write('{}\n'.format(
            (class_name, compute_prf1(predictions, gt_labels, class_name)))
        )
    log_file_img.flush()
    
    smoothed_preds = smooth_predictions(predictions)
    results = []
    for class_name in [0, 1, 2]:
        results.append((class_name, compute_prf1(predictions, gt_labels, class_name)))
    
    avg_pre = np.mean([res[1][0] for res in results])
    avg_rec = np.mean([res[1][1] for res in results])
    avg_f1 = np.mean([res[1][2] for res in results])
    
    log_file_smoothed.write('Seed: {0}\t'
                   'Pre: {pre:.4f}\t'
                   'Rec: {rec:.4f}\t'
                   'F1: {f1:.4f}\n'.format(
                       seed,
                       pre=avg_pre, rec=avg_rec,
                       f1=avg_f1
                   ))
    for class_name in [0, 1, 2]:
        log_file_smoothed.write('{}\n'.format(
            (class_name, compute_prf1(smoothed_preds, gt_labels, class_name)))
        )
    log_file_smoothed.flush()

SyntaxError: invalid syntax (<ipython-input-1-78635045183e>, line 93)

# Old

In [51]:
running_corrects = 0

predictions = []
gt_labels = []

# Iterate over data.
for inputs, labels in dl:
    inputs = inputs.to(device)
    labels = labels.to(device)
    
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward
    # track history if only in train
    with torch.set_grad_enabled(False):
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        
    predictions += preds.cpu().numpy().tolist()
    gt_labels += labels.cpu().numpy().tolist()

    # statistics
    running_corrects += torch.sum(preds == labels.data)
    
epoch_acc = running_corrects.double() / dataset_size

# epoch_acc = running_corrects.double() / dataset_size
# epoch_pre = safe_divide(true_positives, (true_positives + false_positives))
# epoch_recall = safe_divide(true_positives, (true_positives + false_negatives))
# epoch_f1 = safe_divide(2 * epoch_pre * epoch_recall, (epoch_pre + epoch_recall))

# print('Acc: {:.4f} Pre: {:.4f} Rec: {:.4f} F1: {:.4f}'.format(
#     epoch_acc, epoch_pre, epoch_recall, epoch_f1))
# print('TP: {} TN: {} FP: {} FN: {}'.format(
#     true_positives.data, true_negatives.data, false_positives.data, false_negatives.data))

# predictions = [p[0] for p in predictions]

# smoothed_preds = smooth_predictions(predictions, 3)

# print("Smoothed stats:")
# print(acc_prf1(smoothed_preds, gt_labels))

In [55]:
# Compute per-class P/R/F1
def compute_prf1(preds, gts, class_name):
    tp = len([1 for pred, gt in zip(preds, gts) if gt == class_name and pred == gt])
    fp = len([1 for pred, gt in zip(preds, gts) if pred == class_name and pred != gt])
    fn = len([1 for pred, gt in zip(preds, gts) if gt == class_name and pred != gt])
    
    pre = tp / (tp + fp)
    rec = tp / (tp + fn)
    f1 = 2 * pre * rec  / (pre + rec)
    
    return pre, rec, f1, tp, fp, fn

In [61]:
for class_name in [0, 1, 2]:
    print(class_name, compute_prf1(predictions, gt_labels, class_name))

0 (0.7263820853743876, 0.4413265306122449, 0.5490610949484264, 1038, 391, 1314)
1 (0.5333728863838624, 0.7644557823129252, 0.6283417787873492, 1798, 1573, 554)
2 (0.8220833333333334, 0.7904647435897436, 0.8059640522875817, 1973, 427, 523)


In [62]:
# Smooth predictions and compute again
def smooth_predictions(preds, window_radius = 3, segment_length = 16):
    result = []
    for start_idx in range(0, len(preds), segment_length):
        for i in range(segment_length):
            start = max(0, i - window_radius) + start_idx
            end = min(len(preds), i + window_radius) + start_idx
            window = preds[start:end]
            result += [max(window, key=window.count)]
    
    return result

In [63]:
smoothed_preds = smooth_predictions(predictions)

In [64]:
for class_name in [0, 1, 2]:
    print(class_name, compute_prf1(smoothed_preds, gt_labels, class_name))

0 (0.7317763623496107, 0.43962585034013607, 0.549269588313413, 1034, 379, 1318)
1 (0.5334507042253521, 0.7729591836734694, 0.63125, 1818, 1590, 534)
2 (0.8377469525010509, 0.7984775641025641, 0.8176410256410258, 1993, 386, 503)
