In [1]:
import os
import os.path as osp
import numpy as np
import math
import pandas as pd
import cv2
import re
from datetime import datetime
from tqdm import tqdm
import shutil
import random
from PIL import Image, ImageOps
def to_pil(cv_img):
    img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img)
src = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
branch = "Yilan_03"
cam = int(branch.split("_")[-1])
img_list = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if 'jpg' in f]

In [2]:
dst = osp.join(src, f"images_{cam:02}")
if not osp.exists(dst):
    os.mkdir(osp.join(src, f"images_{cam:02}"))

In [13]:
dst = osp.join(src, f"sample_images_{cam:02}")
if not osp.exists(dst):
    os.mkdir(dst)

In [3]:
df = pd.read_csv(osp.join(src, f"{branch}.csv"))

In [4]:
df.head()

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y
0,03_20200419170000.jpg,634,219,717,305,1,03_0001,1,698,341
1,03_20200419170000.jpg,1097,166,1175,276,1,03_0002,1,1116,317
2,03_20200419170000.jpg,203,1229,321,1341,0,03_0003,0,308,1246
3,03_20200419170000.jpg,203,1286,317,1341,0,03_0004,0,298,1288
4,03_20200419170000.jpg,559,1626,627,1719,0,03_0005,0,615,1638


In [5]:
df[df.isna().any(axis=1)]

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y


In [6]:
df = df.dropna()

In [15]:
np.random.seed(0)
sample = np.random.randint(0, len(df), 264)
df = df.loc[sample]

In [7]:
def get_rotate_angle(dx, dy):
    if dy == 0:
        return 0
    angle = np.arctan(np.abs(dx/dy)) / math.pi * 180
    if dx > 0 and dy < 0:
        return angle
    elif dx < 0 and dy < 0:
        return -1 * angle
    elif dx > 0 and dy > 0:
        return 180 - angle
    else:
        return -1 * (180 - angle)

def get_normalized_img(crop, pos):
    pil_img = to_pil(crop)
    crop_w, crop_h = pil_img.size
    x1, y1, x2, y2 = pos
    x, y = (x1+x2) / 2, (y1+y2) / 2
    diag_len = np.sqrt(crop_w**2 + crop_h**2)
    delta_h = diag_len - crop_h
    padding = (0, int(delta_h//2), 0, int(delta_h//2))
    pil_img = ImageOps.expand(pil_img, padding)
    angle = get_rotate_angle(x-cx, y-cy)
    pil_img = pil_img.rotate(angle)
    return pil_img

In [8]:
dst

'/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/images_03'

In [9]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
f = open(osp.join(src, f'{branch}.txt'), 'w')
normalized_imgs = []
temp_timestamp = 0
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        # print(fname)
        continue

    for i in range(len(labels)):
        x1 = int(labels.iloc[i]['x1_label'])
        y1 = int(labels.iloc[i]['y1_label'])
        x2 = int(labels.iloc[i]['x2_label'])
        y2 = int(labels.iloc[i]['y2_label'])
        sx = int(labels.iloc[i]['stand_x'])
        sy = int(labels.iloc[i]['stand_y'])
        f.writelines(f"{fname} -1 {x1} {y1} {x2} {y2} {sx} {sy}\n")
        uniform = int(labels.iloc[i]['uniform_label'])
        occlude = int(labels.iloc[i]['occlude_label'])
        pid = labels.iloc[i]['tracking_id_label'].split("_")[-1]
        color = (0,255,0)
        if uniform:
            color = (0,0,255)
        if occlude == 2:
            color = (0,0,0)
        if x2-x1 != 0 and y2-y1 != 0:
            normalized_img = get_normalized_img(img[y1:y2, x1:x2, :], (x1, y1, x2, y2))
            crop_w, crop_h = normalized_img.size
            if crop_w < 50 and crop_h < 100:
                continue
            if occlude == 2:
                continue
            date_str = fname.split("_")[-1]
            year, month, day, hour, minute, second = map(int, pattern.search(date_str).groups())
            date = datetime(year, month, day, hour, minute, second)
            timestamp = int(datetime.timestamp(date))
            img_name = f"{pid}_c{cam}s1_{uniform}_{timestamp}.jpg"
            # break
            normalized_img.save(osp.join(dst, img_name))
        else:
            print(fname)
f.close()

100%|██████████| 1300/1300 [01:01<00:00, 20.99it/s]


In [10]:
crop_img_list = [osp.join(root, f) for root, _, files in os.walk(dst) for f in files if 'jpg' in f]
clean_dst = osp.join(src, f"images_{cam:02}_1th_clean")
if not osp.exists(clean_dst):
    os.mkdir(osp.join(src, f"images_{cam:02}_1th_clean"))

In [11]:
temp_timestamp = -1
temp_pid = -1
num = 1
for path in tqdm(sorted(crop_img_list)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    try:
        pid, _, uniform, timestamp = fname.split("_")
    except:
        print(fname)
        break
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid != temp_pid and temp_pid > 0:
        num += 1
    else:
        if np.abs(timestamp - temp_timestamp) > 100 and temp_timestamp > 0:
            num += 1
    temp_pid = pid
    temp_timestamp = timestamp
    img_name = f"{num:04}_c{cam}s1_{uniform}_{timestamp}.jpg"
    shutil.copy(path, osp.join(clean_dst, img_name))

100%|██████████| 4157/4157 [00:00<00:00, 9045.11it/s]


In [96]:
crop_img_list2 = [osp.join(root, f) for root, _, files in os.walk(clean_dst) for f in files if 'jpg' in f]
clean_dst2 = osp.join(src, f"images_{cam:02}_2th_clean")
if not osp.exists(clean_dst2):
    os.mkdir(osp.join(src, f"images_{cam:02}_2th_clean"))

In [97]:
pid_map = {10:3, 18:3, 8:4, 25:5, 11:5, 9:6, 27:13, 22:13, 29:15, 23:15, 24:16, 21:17, 26:7, 20:7}
for path in tqdm(sorted(crop_img_list2)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    pid, _, uniform, timestamp = fname.split("_")
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid in pid_map:
        pid = pid_map[pid]
    img_name = f"{pid:04}_c{cam}s1_{uniform}_{timestamp}.jpg"
    shutil.copy(path, osp.join(clean_dst2, img_name))


  0%|          | 0/1753 [00:00<?, ?it/s][A
 47%|████▋     | 824/1753 [00:00<00:00, 8233.75it/s][A
 96%|█████████▌| 1682/1753 [00:00<00:00, 8334.55it/s][A
100%|██████████| 1753/1753 [00:00<00:00, 8279.09it/s][A

In [112]:
final_dst = osp.join(src, f"bounding_box_train")
if not osp.exists(final_dst):
    os.mkdir(osp.join(src, final_dst))

In [113]:
pids = set()
offset = 0
for cam in [1,2,4]:
    final_src = osp.join(src, f"images_{cam:02}_2th_clean")
    final_img_list = sorted([osp.join(root, f) for root, _, files in os.walk(final_src) for f in files if 'jpg' in f])
    for path in final_img_list:
        fname = osp.basename(path)
        _pid = int(fname[:4])
        pids.add(_pid)
        pid = _pid + offset
        fname = f"{pid:04}" + fname[4:]
        shutil.copy(path, osp.join(final_dst, fname))
    offset += len(pids)

In [40]:
sample_df.head()

1911    01_20200405144152.jpg
3002    01_20200405144453.jpg
820     01_20200405143904.jpg
3698    01_20200405144712.jpg
843     01_20200405143911.jpg
Name: file_name, dtype: object

In [18]:
osp.join(dst, f"{fname}.jpg")

'/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/sample_images_03/03_20200419172139.jpg'

In [17]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        continue

    for i in range(len(labels)):
        x1 = int(labels.iloc[i]['x1_label'])
        y1 = int(labels.iloc[i]['y1_label'])
        x2 = int(labels.iloc[i]['x2_label'])
        y2 = int(labels.iloc[i]['y2_label'])
        uniform = int(labels.iloc[i]['uniform_label'])
        occlude = int(labels.iloc[i]['occlude_label'])
        pid = int(labels.iloc[i]['tracking_id_label'].split("_")[-1])
        spt_x = int(labels.iloc[i]['stand_x']) 
        spt_y = int(labels.iloc[i]['stand_y'])
        color = (0,255,0)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
        cv2.circle(img, (spt_x, spt_y), 7, (255,255,255), -1)
        cv2.circle(img, (spt_x, spt_y), 5, (0,0,255), -1)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
    cv2.imwrite(osp.join(dst, f"{fname}.jpg"), img)
    # break

100%|██████████| 1670/1670 [00:31<00:00, 52.66it/s]


In [None]:
import pycocotools.coco as coco
import json

data_path = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
coco_branch = 'flow'

src = osp.join(data_path, coco_branch)
train_dst = osp.join(src, 'train2017')
val_dst = osp.join(src, 'val2017')
if not osp.exists(train_dst):
    os.mkdir(train_dst)
if not osp.exists(val_dst):
    os.mkdir(val_dst)
label_src = osp.join(src, f"{branch}.csv")
df = pd.read_csv(label_src)

img_paths = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if '.jpg' in f or '.png' in f]

In [None]:
dataset = {
    "info": {},
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": []
}
dataset['categories'].append({
    'id': 1,
    'name': "person",
    'supercategory': "people",
    'keypoints': [],
    'skeleton': []
})

# anno_paths = [osp.join(root, f) for root, _, files in os.walk(src) for f in files if '.txt' in f]

sub_index = 0
idx = 0
for path in tqdm(img_paths):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    dst = osp.basename(path)
    anns = df[df['file_name']==fname]
    dst = osp.join(train_dst, dst)
    shutil.copy(path, dst)
    idx += 1
    dataset['images'].append({
        'coco_url': '',
        'date_captured': '',
        'file_name': osp.basename(dst),
        'flickr_url': '',
        'id': idx,
        'license': 0,
        'width': w,
        'height': h
    })

    for i in range(len(anns)):
        sub_index += 1
        x1 = int(anns.iloc[i]['x1_label'])
        y1 = int(anns.iloc[i]['y1_label'])
        x2 = int(anns.iloc[i]['x2_label'])
        y2 = int(anns.iloc[i]['y2_label'])
        width = x2 - x1
        height = y2 - y1
        identity = int(anns.iloc[i]['tracking_id_label'].split("_")[-1])
        if width < 50 and height < 100:
            identity = -1
        if occlude == 2:
            identity = -1

        dataset['annotations'].append({
            'area': int(width*height),
            'bbox': [int(x1),int(y1),int(width),int(height)],
            'category_id': 1,
            'pid': identity,
            'id': sub_index,
            'image_id': idx,
            'iscrowd': 0,
            'segmentation': [],
        })
    # shutil.copy(path, osp.join(val_dst, osp.basename(path)))
    # break