In [2]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

import numpy as np
import cv2

### Device configuration

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

###  Hyper parameters

In [5]:
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epoches = 2
learning_rate = 0.01

In [2]:
# cnn model
cnn_model = torchvision.models.inception_v3(pretrained=True)

Downloading: "https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth" to /Users/dushuchen/.torch/models/inception_v3_google-1a9a5a14.pth
100.0%


In [None]:
# lstm models
class EncoderRNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.relu = nn.ReLU()
        
        # initialize weights
        nn.init.xavier_uniform(self.lstm.weight_ih_l0, gian=np.sqrt(2))
        nn.init.xavier_uniform(self.lstm.weight_hh_l0, gian=np.sqrt(2))
        
    def forward(self, x):
        # set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # forward propagate lstm
        out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # decode the hidden state of the last time step
        out = self.fc(out[:,  -1,  :])
        
        return self.relu(out)
    
class DecoderRNN(nn.Module):
    
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True)
        #self.relu = nn.ReLU
        self.sigmoid = nn.Sigmoid()
        
        # initialize weights
        nn.init.xavier_uniform(self.lstm.weight_ih_l0, gian=np.sqrt(2))
        nn.init.xavier_uniform(self.lstm.weight_hh_l0, gian=np.sqrt(2))
        
    def forward(self, x):
        # set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.output_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.output_size).to(device)
        
        # forward propagate lstm
        out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # decode the hidden state of the last time step
        #out = self.fc(out[:,  -1,  :])
        
        return self.sigmoid(out)

class AutoEncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(AutoEncoderRNN, self).__init__()
        self.encoder = EncoderRNN(input_size, hidden_size, num_layers)
        self.decoder = DecoderRNN(hidden_size, input_size, num_layers)
        
    def forward(self, x):
        encoded_x = self.encoder(x)
        decoded_x = self.decoder(encoded_x)
        
        return decoded_x

In [None]:
cap = cv2.VideoCapture('vtest.avi')

while(cap.isOpened()):
    ret, frame = cap.read()

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    cv2.imshow('frame', rgb)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()