# DUI DATA
Deep Urban Interaction - Data Preparation  
Ryan Yan Zhang <ryanz@mit.edu>  
City Science, MIT Media Lab  

In [7]:
# General Imports
from pprint import pprint
import random
import datetime
import time

from IPython.core.debugger import set_trace

import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Data Generation

## Image from video with OpenCV

## Pose json from image with OpenPose

## People Bounding box from OpenPose

# Data Editing

## labeled data to DUI training data   

In [None]:
import numpy as np
import os
import json
import cv2


# This file takes raw image, fully labelled json with interaction matrix,
# and edit and save images of each pair of people with black background. 
# The images are splited into "images_yes" and "images_no" folder depend
# on if there is interaction of the pair or not. 
# This effort is for generating the training set for the DUI (Deep Urban 
# Interaction) DNN (Deep Neural Networks)
# by Ryan Yan Zhang <ryanz@mit.edu>


# constant parameters
xx = 0
yy = 1


# input parameters
root_path = r'..\data\raw\DUI\input'
raw_image_dir = 'images_original_1080p_jpg'
save_image_yes_dir = r'..\yes'
save_image_no_dir = r'..\no'
image_ext = '.jpg'
json_dir = 'jsons_fully_labeled'
json_ext = '.json'
crop_to_ext = False
resize_stitch = True
resize_size = 512


# main code
# define paths
LABELED_JSON_PATH = os.path.join(root_path, json_dir)
RAW_IMAGE_PATH = os.path.join(root_path, raw_image_dir)
SAVE_IMAGE_YES_PATH = os.path.join(root_path, save_image_yes_dir)
SAVE_IMAGE_NO_PATH = os.path.join(root_path, save_image_no_dir)

# make path if not existed
if not os.path.exists(SAVE_IMAGE_YES_PATH):
    os.makedirs(SAVE_IMAGE_YES_PATH)
if not os.path.exists(SAVE_IMAGE_NO_PATH):
    os.makedirs(SAVE_IMAGE_NO_PATH)

# get list of filenames
labeled_json_filenames = [f for f in os.listdir(LABELED_JSON_PATH) if os.path.isfile(os.path.join(LABELED_JSON_PATH, f)) and f.endswith(json_ext)]
raw_image_filenames = []
for fj in labeled_json_filenames:
    with open(os.path.join(LABELED_JSON_PATH, fj)) as f:
        fi = json.load(f)['filename']
        fi = fi.replace('png','jpg')
        raw_image_filenames.append(fi)
#print(f'raw_image_filenames: {raw_image_filenames}')

# loop all the image files, json files, and save result
for n in range(len(labeled_json_filenames)):

    # load json data
    with open(os.path.join(LABELED_JSON_PATH, labeled_json_filenames[n])) as f:
        json_data = json.load(f)

    # load img
    image_filename = raw_image_filenames[n]
    img = cv2.imread(os.path.join(RAW_IMAGE_PATH, image_filename))
    #import pdb; pdb.set_trace()
    height, width, channels = img.shape

    # count largar than 1
    if json_data['count'] > 1:
        # draw all links
        interaction_matrix = json_data['interaction_index']
        num_ppl = len(json_data['people'])
        for j in range(num_ppl):
            for i in range(num_ppl):
                if i > j:
                    person_0 = json_data['people'][j]
                    person_1 = json_data['people'][i]
                    # person_0
                    y0 = min(person_0['tl_coord'][yy], person_0['br_coord'][yy])
                    y1 = max(person_0['tl_coord'][yy], person_0['br_coord'][yy])
                    x0 = min(person_0['tl_coord'][xx], person_0['br_coord'][xx])
                    x1 = max(person_0['tl_coord'][xx], person_0['br_coord'][xx])
                    if x0 < 0: x0 = 0
                    if y0 < 0: y0 = 0
                    # person_1
                    y2 = min(person_1['tl_coord'][yy], person_1['br_coord'][yy])
                    y3 = max(person_1['tl_coord'][yy], person_1['br_coord'][yy])
                    x2 = min(person_1['tl_coord'][xx], person_1['br_coord'][xx])
                    x3 = max(person_1['tl_coord'][xx], person_1['br_coord'][xx])
                    if x2 < 0: x2 = 0
                    if y2 < 0: y2 = 0
                    if resize_stitch == False:
                        # create a black image
                        new_img = np.zeros((height,width,3), np.uint8)
                        # copy pixels of the regions of two persons
                        # person_0
                        y0 = min(person_0['tl_coord'][yy], person_0['br_coord'][yy])
                        y1 = max(person_0['tl_coord'][yy], person_0['br_coord'][yy])
                        x0 = min(person_0['tl_coord'][xx], person_0['br_coord'][xx])
                        x1 = max(person_0['tl_coord'][xx], person_0['br_coord'][xx])
                        if x0 < 0: x0 = 0
                        if y0 < 0: y0 = 0
                        new_img[y0:y1, x0:x1] = img[y0:y1, x0:x1]
                        # person_1
                        y2 = min(person_1['tl_coord'][yy], person_1['br_coord'][yy])
                        y3 = max(person_1['tl_coord'][yy], person_1['br_coord'][yy])
                        x2 = min(person_1['tl_coord'][xx], person_1['br_coord'][xx])
                        x3 = max(person_1['tl_coord'][xx], person_1['br_coord'][xx])
                        if x2 < 0: x2 = 0
                        if y2 < 0: y2 = 0
                        new_img[y2:y3, x2:x3] = img[y2:y3, x2:x3]
                        # if crop_to_ext
                        if crop_to_ext:
                            x4 = min(x0, x2)
                            x5 = max(x1, x3)
                            y4 = min(y0, y2)
                            y5 = max(y1, y3)
                            new_img = new_img[y4:y5, x4:x5]
                    else:  # if resize and stitch 3 images horizontally
                        # create a black image
                        new_img = np.zeros((resize_size,resize_size*3,3), np.uint8)
                        # resize img
                        img_resize = cv2.resize(img, (resize_size, resize_size))
                        # copy pixels of the resized img to new img left
                        new_img[:, :resize_size] = img_resize[:, :]
                        # crop and resize person 0 and 1
                        img_p0 = img[y0:y1, x0:x1]
                        img_resize_p0 = cv2.resize(img_p0, (resize_size, resize_size))
                        img_p1 = img[y2:y3, x2:x3]
                        img_resize_p1 = cv2.resize(img_p1, (resize_size, resize_size))
                        # copy pixels of 2 persons to new image middle and right
                        new_img[:, resize_size:resize_size*2] = img_resize_p0[:,:]
                        new_img[:, resize_size*2:resize_size*3] = img_resize_p1[:,:]
                    # save new image to "images_yes" and "images_no" folder depends
                    # on if there is interaction of the pair or not
                    if interaction_matrix[j][i] == 0:
                        result_image_filename = f'{os.path.splitext(image_filename)[0]}_{i}_{j}_no{os.path.splitext(image_filename)[1]}'
                        cv2.imwrite(os.path.join(SAVE_IMAGE_NO_PATH, result_image_filename), new_img)
                    else:
                        result_image_filename = f'{os.path.splitext(image_filename)[0]}_{i}_{j}_yes{os.path.splitext(image_filename)[1]}'
                        cv2.imwrite(os.path.join(SAVE_IMAGE_YES_PATH, result_image_filename), new_img)
                    print(f'{result_image_filename} saved. ')

# Train valid set split  
## copy files by chance  

In [None]:
import os
import shutil
import random


# This script go through all files in src folder and copy to save folders by random chances
# by Ryan Yan Zhang <ryanz@mit.edu>


# input parameters
# yes set
src_dir_yes = r'..\data\raw\DUI\yes'
save_dir_yes_train = r'..\data\raw\DUI\train_512x3_jpg\yes'
num_yes_train = 3787
save_dir_yes_valid = r'..\data\raw\DUI\valid_512x3_jpg\yes'
num_yes_valid = 421
# no set
src_dir_no = r'..\data\raw\DUI\no'
save_dir_no_train = r'..\data\raw\DUI\train_512x3_jpg\no'
num_no_train = 10000
save_dir_no_valid = r'..\data\raw\DUI\valid_512x3_jpg\no'
num_no_valid = 1000
# file type
file_ext = '.jpg'


# functions
def copy_by_chance(src_dir, save_dir_0, num_0, save_dir_1, num_1, file_ext):

    # make path if not existed
    if not os.path.exists(save_dir_0):
        os.makedirs(save_dir_0)
    if not os.path.exists(save_dir_1):
        os.makedirs(save_dir_1)

    # get list of filenames
    filenames = [f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f)) and f.endswith(file_ext)]
    # generate random, non-dup id lists
    num_src = len(filenames)
    id_list_both = random.sample(range(num_src), num_0 + num_1)
    id_list_0 = id_list_both[:num_0]
    id_list_1 = id_list_both[num_0:]
    #print(f'len(id_list_0): {len(id_list_0)}')
    #print(f'id_list_0: {id_list_0}')

    for id in id_list_0:
        file_copying = filenames[id]
        full_path_src = os.path.join(src_dir, file_copying)
        full_path_save = os.path.join(save_dir_0, file_copying)
        shutil.copyfile(full_path_src, full_path_save)
        print(f'{full_path_save} copied. ')

    for id in id_list_1:
        file_copying = filenames[id]
        full_path_src = os.path.join(src_dir, file_copying)
        full_path_save = os.path.join(save_dir_1, file_copying)
        shutil.copyfile(full_path_src, full_path_save)
        print(f'{full_path_save} copied. ')
        

# main code
# yes set
copy_by_chance(src_dir_yes, save_dir_yes_train, num_yes_train, save_dir_yes_valid, num_yes_valid, file_ext)
# no set
copy_by_chance(src_dir_no, save_dir_no_train, num_no_train, save_dir_no_valid, num_no_valid, file_ext)