In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch,torchvision
print(f"torch version：{torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")
print(f"CUDA Version: {torch.version.cuda}")
print(f"cuDNN version is: {torch.backends.cudnn.version()}")

torch version：1.11.0+cu113
torchvision version: 0.12.0+cu113
CUDA Version: 11.3
cuDNN version is: 8200


In [3]:
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision.transforms as T

In [4]:
!pip uninstall torchvision

Found existing installation: torchvision 0.12.0+cu113
Uninstalling torchvision-0.12.0+cu113:
  Would remove:
    /usr/local/lib/python3.7/dist-packages/torchvision-0.12.0+cu113.dist-info/*
    /usr/local/lib/python3.7/dist-packages/torchvision.libs/libcudart.053364c0.so.11.0
    /usr/local/lib/python3.7/dist-packages/torchvision.libs/libjpeg.ceea7512.so.62
    /usr/local/lib/python3.7/dist-packages/torchvision.libs/libnvjpeg.90286a3c.so.11
    /usr/local/lib/python3.7/dist-packages/torchvision.libs/libpng16.7f72a3c5.so.16
    /usr/local/lib/python3.7/dist-packages/torchvision.libs/libz.1328edc3.so.1
    /usr/local/lib/python3.7/dist-packages/torchvision/*
Proceed (y/n)? y
  Successfully uninstalled torchvision-0.12.0+cu113


In [5]:
!pip install pytorchvideo av
!pip install av
!pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
import pytorchvideo
import av
print(av.__version__)

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.5.tar.gz (132 kB)
[K     |████████████████████████████████| 132 kB 5.1 MB/s 
[?25hCollecting av
  Downloading av-9.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28.2 MB)
[K     |████████████████████████████████| 28.2 MB 3.0 MB/s 
[?25hCollecting fvcore
  Downloading fvcore-0.1.5.post20220506.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 3.4 MB/s 
[?25hCollecting parameterized
  Downloading parameterized-0.8.1-py2.py3-none-any.whl (26 kB)
Collecting iopath
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 34.7 MB/s 
Collecting portalocker
  Downloading portalocker-2.4.0-py2.py3-none-any.whl (16 kB)
Building wheels for c

9.2.0


In [6]:
import torch

In [7]:
FOLDERNAME = 'CS_231n_Project/ConvLSTM'
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))
%cd /content/drive/My\ Drive/$FOLDERNAME

/content/drive/.shortcut-targets-by-id/1reDWkXcIczcWArSj0UXBBRo24d0byImu/CS_231n_Project/ConvLSTM


In [8]:
from torchvision.datasets import UCF101

# These are some minimal variables used to configure and load the dataset:
ucf_data_dir = "../UCF101_sample/UCF-101"
ucf_label_dir = "../UCF101_sample/TrainTestList"
frames_per_clip =5
step_between_clips = 5
batch_size = 64

In [9]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from Seq2Seq import Seq2Seq
from torch.utils.data import DataLoader

import io
import imageio
from ipywidgets import widgets, HBox

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
tfs = T.Compose([
    # TODO: this should be done by a video-level transfrom when PyTorch provides transforms.ToTensor() for video
    # scale in [0, 1] of type float
    # T.Lambda(lambda x: x / 255.),
    # reshape into (C,T,H,W) from (T, H, W, C) for easier convolutions #### (to match ConvLSTM stuff)
    # might need C in index 1
    T.Lambda(lambda x: x.permute(3, 0, 1, 2)),
    # rescale to the most common size
    T.Lambda(lambda x: nn.functional.interpolate(x, (64, 64))),
])

In [11]:
# def custom_collate(batch):
#     filtered_batch = []
#     for video, _, label in batch:
#         filtered_batch.append((video, label))
#     return torch.utils.data.dataloader.default_collate(filtered_batch)

In [12]:
def custom_collate(batch):

    # Add channel dim, scale pixels between 0 and 1, send to GPU
    filtered_batch = []
    for video, _, _ in batch:
      filtered_batch.append(video)
    
    # new_batch = torch.utils.data.dataloader.default_collate(filtered_batch)
    
    new_batch = torch.stack(filtered_batch)   
    new_batch = new_batch / 255.0                        
    new_batch = new_batch.to(device)                   

    # first 4 frames are input, 5th frame is target               
    return new_batch[:,:,0:4, :, :], new_batch[:,:,4, :, :]     


In [13]:
# create train loader (allowing batches and other extras)

train_dataset = UCF101(ucf_data_dir, ucf_label_dir, frames_per_clip=frames_per_clip,
                       step_between_clips=step_between_clips, train=True, transform=tfs)
#train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
#                                           collate_fn=custom_collate)
# create test loader (allowing batches and other extras)
test_dataset = UCF101(ucf_data_dir, ucf_label_dir, frames_per_clip=frames_per_clip,
                      step_between_clips=step_between_clips, train=False, transform=tfs)
#test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True,
 #                                         collate_fn=custom_collate)



  0%|          | 0/32 [00:00<?, ?it/s]

ImportError: ignored

In [None]:
train_dataset

In [None]:
# get subset of data
#train_dataset = torch.utils.data.Subset(train_dataset, range(0, len(train_dataset), 8))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                                         collate_fn=custom_collate)
print(train_loader)
#print(test_loader)

In [None]:
# Get a batch
input, _ = next(iter(train_loader))

# Reverse process before displaying
input = input.cpu().numpy() * 255.0     

for video in input[:4]:          # Loop over videos
    video = video.transpose(1, 2, 3, 0)
    with io.BytesIO() as gif:
        imageio.mimsave(gif,video.astype(np.uint8),"GIF",fps=5)
        display(HBox([widgets.Image(value=gif.getvalue())]))

In [None]:
# The input video frames are grayscale, thus single channel
model = Seq2Seq(num_channels=3, num_kernels=64, 
kernel_size=(3, 3), padding=(1, 1), activation="relu", 
frame_size=(64, 64), num_layers=3).to(device)

optim = Adam(model.parameters(), lr=1e-6)

# Binary Cross Entropy, target pixel values either 0 or 1
criterion = nn.BCELoss(reduction='sum')

In [None]:
num_epochs = 3

for epoch in range(1, num_epochs+1):
    
    train_loss = 0                                                 
    model.train()                                                  
    for batch_num, (input, target) in enumerate(train_loader, 1):  
        # input = input[:, 0, :, :, :].unsqueeze(1)
        output = model(input)      
        # print(output.detach().numpy())                         
        loss = criterion(output.flatten(), target.flatten())       
        loss.backward()                                            
        optim.step()                                               
        optim.zero_grad()                                           
        train_loss += loss.item()                                 
    train_loss /= len(train_loader.dataset)                       

    val_loss = 0                                                 
    model.eval()                                                   
    with torch.no_grad():                                          
        for input, target in test_loader:                          
            output = model(input)                                   
            loss = criterion(output.flatten(), target.flatten())   
            val_loss += loss.item()                                
    val_loss /= len(test_loader.dataset)                            

    print("Epoch:{} Training Loss:{:.2f} Validation Loss:{:.2f}\n".format(
        epoch, train_loss, val_loss))
    
    num_epochs = 20

for epoch in range(1, num_epochs+1):
    
    train_loss = 0                                                 
    model.train()                                                  
    for batch_num, (input, target) in enumerate(train_loader, 1):  
        output = model(input)                                     
        loss = criterion(output.flatten(), target.flatten())       
        loss.backward()                                            
        optim.step()                                               
        optim.zero_grad()                                           
        train_loss += loss.item()                                 
    train_loss /= len(train_loader.dataset)                       

    val_loss = 0                                                 
    model.eval()                                                   
    with torch.no_grad():                                          
        for input, target in test_loader:                          
            output = model(input)                                   
            loss = criterion(output.flatten(), target.flatten())   
            val_loss += loss.item()                                
    val_loss /= len(test_loader.dataset)                            

    print("Epoch:{} Training Loss:{:.2f} Validation Loss:{:.2f}\n".format(
        epoch, train_loss, val_loss))

In [None]:
with io.BytesIO() as gif:
    new_input = np.array(input[0, :, :, :, :]) * 255.0
    new_input = new_input.transpose(1, 2, 3, 0)
    print(input.shape)
    imageio.mimsave(gif, new_input, "GIF", fps = 5)    
    input_gif = gif.getvalue()
# Write target video as gif
with io.BytesIO() as gif:
    new_target = np.array(target[0, :, :, :].unsqueeze(0))* 255.0
    new_target = new_target.transpose(0, 2, 3, 1)
    print(target.shape)
    imageio.mimsave(gif, new_target, "GIF", fps = 5)    
    target_gif = gif.getvalue()

# Write output video as gif
with io.BytesIO() as gif:
    new_output = output[0, :, :, :].unsqueeze(0).detach().numpy()* 255.0
    new_output = new_output.transpose(0, 2, 3, 1)
    imageio.mimsave(gif, new_output, "GIF", fps = 5)    
    output_gif = gif.getvalue()

display(HBox([widgets.Image(value=input_gif), widgets.Image(value=target_gif), 
                  widgets.Image(value=output_gif)]))


In [None]:
def collate_test(batch):

    # Last 1 frames are target
    target = np.array(batch)[:,1:]                     
    
    # Add channel dim, scale pixels between 0 and 1, send to GPU
    batch = torch.tensor(batch)#.unsqueeze(1)          
    batch = batch / 255.0                             
    batch = batch.to(device)                          
    return batch, target

# Test Data Loader
# test_loader = DataLoader(test_data,shuffle=True, 
#                          batch_size=3, collate_fn=collate_test)

# Get a batch
batch, target = next(iter(test_loader))

# Initialize output sequence
output = np.zeros(target.shape, dtype=np.uint8)

# Loop over timesteps
for timestep in range(target.shape[1]):
  input = batch[:,:,timestep:timestep+10]   
  output[:,timestep]=(model(input).squeeze(1).cpu()>0.5)*255.0

In [None]:
for tgt, out in zip(target, output):       # Loop over samples
    tgt = tgt.transpose(1, 2, 3, 0)
    out = out.transpose(1, 2, 3, 0)
    # Write target video as gif
    with io.BytesIO() as gif:
        imageio.mimsave(gif, tgt, "GIF", fps = 5)    
        target_gif = gif.getvalue()

    # Write output video as gif
    with io.BytesIO() as gif:
        imageio.mimsave(gif, out, "GIF", fps = 5)    
        output_gif = gif.getvalue()

    display(HBox([widgets.Image(value=target_gif), 
                  widgets.Image(value=output_gif)]))