# Real-time video streaming



Reference:
* [MSG-Net-PyTorch-Style-Transfer](https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer)
* [colab_webcam.ipynb](https://colab.research.google.com/drive/1QnC7lV7oVFk5OZCm75fqbLAfD9qBy9bw?usp=sharing&authuser=2#scrollTo=1nkSnkbkk4cC)
* [yolov4_webcam.ipynb](https://colab.research.google.com/drive/1xdjyBiY75MAVRSjgmiqI7pbRLn58VrbE?usp=sharing&authuser=2#scrollTo=RPDr23YFW_7c)

## Preperation




### Install Yolact

In [None]:
import os, json, sys, tqdm, argparse
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/gdrive/My\ Drive/DL_project

/content/gdrive/My Drive/DL_project


### Import Package

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from PIL import Image
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import torch.onnx

import time
import re

### Install dependencies for webcam on colab

In [None]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import PIL
import io
import html

### Define functions for webcam streaming


#### Convert JavaScript object to OpenCV image

In [None]:
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  
  image_bytes = b64decode(js_reply.split(',')[1])
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

#### Convert OpenCV image into byte string for overlay

In [None]:
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()

  bbox_PIL.save(iobuf, format='png')
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes/

#### Using webcam as input in JavaScript

In [None]:
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data



## Basic Setting

In [None]:
os.listdir()

['model',
 'data',
 'yolact',
 'fast_neural_style',
 'saved_models',
 'join_model_2.ipynb',
 'webcam.ipynb']

In [None]:
frame = ''
parser = {
    "cuda":True,
    "content_frame":frame,
    "content_scale":None,
    "loaded_model":"model/yolact_style_udnie.pth"
}
args = argparse.Namespace(**parser)

## Load Model

In [None]:
loaded_model = torch.jit.load(args.loaded_model)
loaded_model.eval()

RecursiveScriptModule(
  original_name=PartialTransformer
  (sty_model): RecursiveScriptModule(
    original_name=TransformerNet
    (conv1): RecursiveScriptModule(
      original_name=ConvLayer
      (reflection_pad): RecursiveScriptModule(original_name=ReflectionPad2d)
      (conv2d): RecursiveScriptModule(original_name=Conv2d)
    )
    (in1): RecursiveScriptModule(original_name=InstanceNorm2d)
    (conv2): RecursiveScriptModule(
      original_name=ConvLayer
      (reflection_pad): RecursiveScriptModule(original_name=ReflectionPad2d)
      (conv2d): RecursiveScriptModule(original_name=Conv2d)
    )
    (in2): RecursiveScriptModule(original_name=InstanceNorm2d)
    (conv3): RecursiveScriptModule(
      original_name=ConvLayer
      (reflection_pad): RecursiveScriptModule(original_name=ReflectionPad2d)
      (conv2d): RecursiveScriptModule(original_name=Conv2d)
    )
    (in3): RecursiveScriptModule(original_name=InstanceNorm2d)
    (res1): RecursiveScriptModule(
      original_name=

## Load frame from webcam

In [None]:
def frame_pre(args):
    device = torch.device("cuda" if args.cuda else "cpu")

    frame = args.content_frame
    content = PIL.Image.fromarray(frame.astype('uint8'), 'RGB')

    content_transform = transforms.Compose([
        transforms.Resize((480, 640)),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])
    content_image = content_transform(content)
    content_image = content_image.unsqueeze(0).to(device)
    return content_image

## Streaming video from webcam

In [None]:
video_stream()

label_html = 'Capturing...'

bbox = ''
count = 0 
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    frame = js_to_image(js_reply["img"])
    args.content_frame = frame


    content_img = frame_pre(args)


    result = loaded_model(content_img)
    res_numpy = result.cpu().numpy().astype(np.uint8)

    img_rgba = np.zeros([480,640,4], dtype=np.uint8)
    img_rgba[:,:,:3] = res_numpy
    img_rgba[:,:,3] = 255

    bbox_bytes = bbox_to_bytes(img_rgba)
    bbox = bbox_bytes

<IPython.core.display.Javascript object>