# Script for Generating Training Data from Frames using PIPs
https://github.com/aharley/pips

## Initial set up
Connection to MyDrive, requirements, imports, downloading models weights

In [1]:
# connecting drive to colab notebook
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# installing requirements
!pip install imageio==2.19.3
!pip install opencv-python==4.6.0.66
!pip install einops==0.4.1
!pip install scikit-learn==1.1.1
!pip install matplotlib==3.5.1
!pip install protobuf==3.20.0
!pip install fire==0.4.0

In [4]:
import sys
sys.path.append('/content/drive/My Drive/drive_folder/pips')
import time
import numpy as np
import io
import os
from PIL import Image
import cv2
import saverloader
from nets.pips import Pips
import utils.improc
import random
import glob
from utils.basic import print_, print_stats
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import gc
import imageio.v2 as imageio
from csv import writer

# save numpy array as npz file
from numpy import asarray
from numpy import savez_compressed

In [None]:
# downloading their reference model
!bash /content/drive/MyDrive/drive_folder/pips/get_reference_model.sh

## Loading data to generate training data from

In [None]:
DATA_DIR = "frames_small" # specify the name of the data folder located in drive
!unzip -d "$DATA_DIR"/ /content/drive/MyDrive/"$DATA_DIR".zip # unziping frames

## Generating training data

### PIPs implementation

In [38]:
def create_model(model_weights):
  model = Pips(stride=4).cuda()
  parameters = list(model.parameters())
  _ = saverloader.load(model_weights, model)
  global_step = 0
  model.eval()
  return model


def run_model(model, rgbs, N, split):
    rgbs = rgbs.cuda().float() # B, S, C, H, W

    B, S, C, H, W = rgbs.shape
    rgbs_ = rgbs.reshape(B*S, C, H, W)
    H_, W_ = 360, 640
    rgbs_ = F.interpolate(rgbs_, (H_, W_), mode='bilinear')
    H, W = H_, W_
    rgbs = rgbs_.reshape(B, S, C, H, W)

    # pick N points to track; we'll use a uniform grid
    N_ = np.sqrt(N).round().astype(np.int32)
    grid_y, grid_x = utils.basic.meshgrid2d(B, N_, N_, stack=False, norm=False, device='cuda')
    grid_y = 8 + grid_y.reshape(B, -1)/float(N_-1) * (H-16)
    grid_x = 8 + grid_x.reshape(B, -1)/float(N_-1) * (W-16)
    xy = torch.stack([grid_x, grid_y], dim=-1) # B, N_*N_, 2
    _, S, C, H, W = rgbs.shape

    print_stats('rgbs', rgbs)

    # splitting grid of points into tensors of size 1500 along dim=1
    if split:
       xy_split = torch.split(xy, 1500, dim=1)
       preds_split = []

      # for each splitted point compute trajs
       for i in range(len(xy_split)):
         preds, preds_anim, vis_e, stats = model(xy_split[i], rgbs, iters=6)      
         preds_split.append(preds[-1])

       # put trajs back together
       trajs_e = torch.cat(preds_split, 2)
       

    else:
      preds, preds_anim, vis_e, stats = model(xy, rgbs, iters=6)
      # preds is a list of torch tensors 
      trajs_e = preds[-1] # tensor of shape (frames, tracking points, 2)
    print_stats('trajs_e', trajs_e)

    # return vis?? as well
    return trajs_e[:,-1,:,:]

def generate_training_data(pips_model, video_name, n, split=True):
    model = pips_model
    
    ## Choose hyps
    B = 1
    S = 8
    N = n**2 # number of points to track

    # Get frames of a file
    filenames = glob.glob(DATA_DIR+'/content/frames/'+video_name+'/frames/*.jpg')
    filenames = sorted(filenames)

    # Remove every second frame from a list
    n = 2
    del filenames[n - 1::n]

    max_iters = len(filenames)//S # run each unique subsequence
    iters = 5
    global_step = 0

    # Run model each of 8 frames
    while global_step < max_iters:
        global_step += 1
        print("step",global_step,"out of",max_iters)

        try:
            rgbs = []
            sample_id = video_name[-11:]+"_"+str((global_step-1)*S)
            # skip generating this sample if already in the log list
            if sample_id in LOG_LIST:
                  print(sample_id+" already in log list")
                  continue
            for s in range(S):
                frame_num = (global_step-1)*S+s
                fn = filenames[frame_num]
                if s==0:
                    print('start frame', fn)
                im = imageio.imread(fn)
                im = im.astype(np.uint8)
                im = torch.from_numpy(im).permute(2,0,1)
                rgbs.append(im)
            rgbs = torch.stack(rgbs, dim=0).unsqueeze(0) # 1, S, C, H, W
            print(rgbs.shape)

            with torch.no_grad():
                trajs_e = run_model(model, rgbs, N, split)

            # store img0 and img1 (frame 1 and 8) and trajs_e
            save_data(sample_id, rgbs[0][0], rgbs[0][-1], trajs_e)

        except FileNotFoundError as e:
            print('error', e)

def img_to_array(tensor_img):
   x = tensor_img.permute(1, 2, 0).numpy() # from tensor to numpy array
   x = cv2.resize(x, (240, 120)) # resizing image
   return x

def save_data(sample_id, frame0, frame7, trajs):
  """
  Saving coordinates to or COORDS_DIR as .npy files or all together as CSV_FILE file???
  Saving 0th frames to FRAME0_DIR directory as .npy files
  Saving 7th frames to FRAME7_DIR directory as .npy files
  - Coordinates and frames are mapped with sample_id, all sample_ids are stored in sample_ids.txt
  """
  img0 = img_to_array(frame0) # transform first frame to array
  img7 = img_to_array(frame7) # tranform last frame (8th) to array
  coords = trajs.cpu().numpy()

  # Store coords, frame0, frame1
  np.save(COORDS_DIR+sample_id, coords)
  np.save(FRAME0_DIR+sample_id, img0)
  np.save(FRAME1_DIR+sample_id, img7)

  # Add sample_id to log list
  LOG_LIST.append(sample_id)

def save_log(log_list):
  with open(LOG_FILE, "w") as outfile:
    outfile.write("\n".join(log_list))

In [None]:
model = create_model('reference_model')

### Creating directories and running generation of data

In [39]:
#create a folder to store training data in
!mkdir training_data
!mkdir training_data/frame0/
!mkdir training_data/frame1/
!mkdir training_data/coords/
!touch training_data/sample_ids.txt

In [40]:
TRANING_DATA_DIR = "training_data/"
COORDS_DIR = "training_data/coords/"
FRAME0_DIR="training_data/frame0/"
FRAME1_DIR="training_data/frame1/"
LOG_FILE = "training_data/sample_ids.txt"
LOG_LIST = open(LOG_FILE).read().splitlines()

In [None]:
generate_training_data(model, 'Tai chiRen Guang Yi - Chen Style Taiji silk reeling part II-Wh3AmDsdQtM', n=100)
save_log(LOG_LIST)

In [None]:
# create a list of file names to go through
videos_list = [f for f in os.listdir('frames_small/content/frames/') if not f.startswith('.')]
print(videos_list)

['Tai chiCurso Chi Kung de los Seis Sonidos Curativos-PvjYVsRK4Dg', 'Tai chi56式夕陽美功夫扇-uOw-z7CR7x8', 'Tai chi陈式太极拳五十六式-vB8XTJfV4rY', 'Tai chiRen Guang Yi - Chen Style Taiji silk reeling part II-Wh3AmDsdQtM', 'Tai chiShaolin Basics Are From Theater!-gyms4lomW50', 'Tai chi10 forms 2009-KX-dEeB47sc', 'Tai chi熊門楊家太極拳111式第一段\u3000李國光老師示範-X_9SJZuSWQU', 'Tai chiYang Family Tai Chi q&a Knee Brush-WPeVwAhTNuU', 'Tai chiCanda - Tai Chi Chuan Yang-Stil - Sanfte Bewegungsformen für Einsteiger-f7NkWPgh1-o', 'Tai chiWee Kee Jin - Keeping Your Structure-7jn9jeAbChE']


In [None]:
# iterate through the videos list and generate training data files
for x in range(len(videos_list)):
  print("Now generating training data for {}, {} out of {}.".format(videos_list[x], x, len(videos_list)))
  generate_training_data(model, videos_list[x], 100)

### Zip data and store in Drive

In [42]:
# zip training data
!zip -r /content/training_data_01122020.zip /content/training_data -x "*/.*"
# cp NumPy zip file into drive
!cp training_data_01122020.zip /content/drive/MyDrive/

  adding: content/training_data/ (stored 0%)
  adding: content/training_data/sample_ids.txt (deflated 86%)
  adding: content/training_data/coords/ (stored 0%)
  adding: content/training_data/coords/Wh3AmDsdQtM_400.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_2792.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_2056.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_568.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_2184.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_2488.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_1664.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_1112.npy (deflated 12%)
  adding: content/training_data/coords/Wh3AmDsdQtM_2296.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_1712.npy (deflated 11%)
  adding: content/training_data/coords/Wh3AmDsdQtM_144.npy (deflated 11%)
  adding: content/t

## Testing

In [34]:
# see training data of one video for verification
from numpy import load
data = load('training_data/coords/Wh3AmDsdQtM0.npy', allow_pickle=True)
print(data.shape)

(1, 10000, 2)
