In [1]:
import os
import os.path as osp
import numpy as np
import math
import pandas as pd
import cv2
import re
from datetime import datetime
from tqdm import tqdm
import shutil
import random
from PIL import Image, ImageOps
def to_pil(cv_img):
    img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img)
src = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
branch = "Yilan_03"
cam = int(branch.split("_")[-1])
img_list = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if 'jpg' in f]

In [2]:
dst = osp.join(src, f"images_{cam:02}_clean")
if not osp.exists(dst):
    os.mkdir(osp.join(src, f"images_{cam:02}_clean"))

In [None]:
dst = osp.join(src, f"sample_images_{cam:02}")
if not osp.exists(dst):
    os.mkdir(dst)

In [3]:
df = pd.read_csv(osp.join(src, f"{branch}_clean.csv"))

In [None]:
# df['fname'] = pd.Series(index=df.index, dtype=str)
df.head()

In [None]:
df[df.isna().any(axis=1)]

In [None]:
df = df.dropna()

In [None]:
np.random.seed(0)
sample = np.random.randint(0, len(df), 264)
df = df.loc[sample]

In [4]:
def get_rotate_angle(dx, dy):
    if dy == 0:
        return 0
    angle = np.arctan(np.abs(dx/dy)) / math.pi * 180
    if dx > 0 and dy < 0:
        return angle
    elif dx < 0 and dy < 0:
        return -1 * angle
    elif dx > 0 and dy > 0:
        return 180 - angle
    else:
        return -1 * (180 - angle)

def _get_normalized_img(crop, pos):
    pil_img = to_pil(crop)
    crop_w, crop_h = pil_img.size
    x1, y1, x2, y2 = pos
    x, y = (x1+x2) / 2, (y1+y2) / 2
    diag_len = np.sqrt(crop_w**2 + crop_h**2)
    delta_h = diag_len - crop_h
    padding = (0, int(delta_h//2), 0, int(delta_h//2))
    pil_img = ImageOps.expand(pil_img, padding)
    angle = get_rotate_angle(x-cx, y-cy)
    pil_img = pil_img.rotate(angle)
    return angle, pil_img
def get_normalized_img(crop, pos):
    pil_img = to_pil(crop)
    crop_w, crop_h = pil_img.size
    x1, y1, x2, y2 = pos
    x, y = (x1+x2) / 2, (y1+y2) / 2
    angle = get_rotate_angle(x-cx, y-cy)
    pil_img = pil_img.rotate(angle)
    return angle, pil_img

In [5]:
import math
def rotate_A(level, shape):
    # copy from https://pillow.readthedocs.io/en/stable/_modules/PIL/Image.html#Image.rotate
    angle = level % 360.0

    w, h = shape

    post_trans = (0, 0)
    rotn_center = (w / 2.0, h / 2.0)
    angle = math.radians(angle)
    matrix = [
        round(math.cos(angle), 15),
        round(math.sin(angle), 15),
        0.0,
        round(-math.sin(angle), 15),
        round(math.cos(angle), 15),
        0.0,
    ]

    def transform(x, y, matrix):
        (a, b, c, d, e, f) = matrix
        return a * x + b * y + c, d * x + e * y + f

    matrix[2], matrix[5] = transform(
        -rotn_center[0] - post_trans[0], -rotn_center[1] - post_trans[1], matrix
    )
    matrix[2] += rotn_center[0]
    matrix[5] += rotn_center[1]

    return np.array(matrix).reshape(2, 3).astype(np.float)
def apply_A(pt, A):
    new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
    new_pt = np.dot(A, new_pt)
    return new_pt[:2]

In [6]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
# f = open(osp.join(src, f'{branch}.txt'), 'w')
normalized_imgs = []
temp_timestamp = 0
ready = False
radius = 320 #170,240
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        # print(fname)
        continue

    for i in labels.index:
        x1 = int(labels.loc[i]['x1_label'])
        y1 = int(labels.loc[i]['y1_label'])
        x2 = int(labels.loc[i]['x2_label'])
        y2 = int(labels.loc[i]['y2_label'])
        sx = int(labels.loc[i]['stand_x'])
        sy = int(labels.loc[i]['stand_y'])
        occlude = int(labels.loc[i]['occlude_label'])
        # f.writelines(f"{fname} -1 {x1} {y1} {x2} {y2} {sx} {sy}\n")
        pid = labels.loc[i]['tracking_id_label']
        if pid == '-1':
            continue
        pid = pid.split("_")[-1]
        if x2-x1 != 0 and y2-y1 != 0:
            angle, pil_img = get_normalized_img(img, (x1, y1, x2, y2))
            A = rotate_A(angle, (img.shape[1], img.shape[0]))
            rx1, ry1 = apply_A([x1, y1], A)
            rx2, ry2 = apply_A([x2, y2], A)
            rx3, ry3 = apply_A([x2, y1], A)
            rx4, ry4 = apply_A([x1, y2], A)
            np_img = np.array(pil_img)
            tr_x = int(min(rx1, rx2, rx3, rx4))
            tr_y = int(min(ry1, ry2, ry3, ry4))
            bl_x = int(max(rx1, rx2, rx3, rx4))
            bl_y = int(max(ry1, ry2, ry3, ry4))
            crop_w = bl_x - tr_x
            crop_h = bl_y - tr_y
            if crop_w < 80 and crop_h < 80:
                continue
            if crop_h / crop_w < 1.3:
                bcx = (tr_x + bl_y) / 2
                bcy = (tr_y + bl_y) / 2
                if bcx > cx-radius and bcx < cx+radius and bcy > cy-radius and bcy < cy+radius:
                    pass
                else:
                    if occlude != 0:
                        continue
                    tr_x += int(crop_w/4.5)
                    bl_x -= int(crop_w/4.5)
            
            crop = Image.fromarray(np_img[tr_y:bl_y, tr_x:bl_x,:])
            img_name = labels.loc[i]['fname']
            crop.save(osp.join(dst, img_name))
        else:
            print(fname)
    # break

100%|██████████| 1300/1300 [02:53<00:00,  7.51it/s]


In [None]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
# f = open(osp.join(src, f'{branch}.txt'), 'w')
normalized_imgs = []
temp_timestamp = 0
ready = False
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        # print(fname)
        continue

    for i in labels.index:
        x1 = int(labels.loc[i]['x1_label'])
        y1 = int(labels.loc[i]['y1_label'])
        x2 = int(labels.loc[i]['x2_label'])
        y2 = int(labels.loc[i]['y2_label'])
        sx = int(labels.loc[i]['stand_x'])
        sy = int(labels.loc[i]['stand_y'])
        # f.writelines(f"{fname} -1 {x1} {y1} {x2} {y2} {sx} {sy}\n")
        uniform = int(labels.loc[i]['uniform_label'])
        occlude = int(labels.loc[i]['occlude_label'])
        pid = labels.loc[i]['tracking_id_label'].split("_")[-1]
        color = (0,255,0)
        if uniform:
            color = (0,0,255)
        if occlude == 2:
            color = (0,0,0)
        if x2-x1 != 0 and y2-y1 != 0:
            ready = True
            # cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            _, normalized_img = get_normalized_img(img[y1:y2, x1:x2, :], (x1, y1, x2, y2))
            crop_w, crop_h = normalized_img.size
            if crop_w < 50 and crop_h < 100:
                df["tracking_id_label"].loc[i] = "-1"
                continue
            if occlude == 2:
                df["tracking_id_label"].loc[i] = "-1"
                continue
            date_str = fname.split("_")[-1]
            year, month, day, hour, minute, second = map(int, pattern.search(date_str).groups())
            date = datetime(year, month, day, hour, minute, second)
            timestamp = int(datetime.timestamp(date))
            img_name = f"{pid}_c{cam}s1_{uniform}_{timestamp}_{i:04}.jpg"
            df["tracking_id_label"].loc[i] = f"01_{pid}"
            df['fname'].loc[i] = img_name
            # break
            # normalized_img.save(osp.join(dst, img_name))
        else:
            print(fname)
    if ready:
        break
# f.close()

In [None]:
np_img = np.array(pil_img)
tr_x = int(min(rx1, rx2, rx3, rx4))
tr_y = int(min(ry1, ry2, ry3, ry4))
bl_x = int(max(rx1, rx2, rx3, rx4))
bl_y = int(max(ry1, ry2, ry3, ry4))
cv2.rectangle(np_img, (tr_x, tr_y), (bl_x, bl_y), color, 2)
Image.fromarray(np_img)

In [None]:
crop_img_list = [osp.join(root, f) for root, _, files in os.walk(dst) for f in files if 'jpg' in f]
clean_dst = osp.join(src, f"images_{cam:02}_1th_clean")
if not osp.exists(clean_dst):
    os.mkdir(osp.join(src, f"images_{cam:02}_1th_clean"))

In [None]:
len(crop_img_list) == len(df['fname'].dropna())

In [None]:
temp_timestamp = -1
temp_pid = -1
num = 1
for path in tqdm(sorted(crop_img_list)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    try:
        pid, _, uniform, timestamp, index = fname.split("_")
        index = int(index)
    except:
        print(fname)
        break
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid != temp_pid and temp_pid > 0:
        num += 1
    else:
        if np.abs(timestamp - temp_timestamp) > 50 and temp_timestamp > 0:
            num += 1
    temp_pid = pid
    temp_timestamp = timestamp
    img_name = f"{num:04}_c{cam}s1_{uniform}_{timestamp}_{index}.jpg"
    df["tracking_id_label"].loc[index] = f"01_{num:04}"
    df['fname'].loc[index] = img_name
    shutil.copy(path, osp.join(clean_dst, img_name))

In [None]:
df.loc[:10]

In [None]:
pids = {
    1:[18,6,81],
    2:[31,78],
    4:[15,36,69],
    5:[14,26,59,70],
    7:[13,27,39,65,75],
    8:[11,42,72],
    9:[19,32,62,73],
    10:[12],
    16:[23,24,66,74],
    }
pid_map = {}
for pid in pids:
    for copy in pids[pid]:
        pid_map[copy] = pid

In [None]:
pids = {
    1:[4,12,21,22,37],
    2:[3,13],
    5:[6,17],
    8:[29],
    38:[74]
    }
pid_map = {}
for pid in pids:
    for copy in pids[pid]:
        pid_map[copy] = pid

In [None]:
pids = {
    1:[2,3,4],
    6:[30],
    9:[15,17,35],
    10:[28],
    12:[25],
    14:[27],
    }
pid_map = {}
for pid in pids:
    for copy in pids[pid]:
        pid_map[copy] = pid

In [None]:
crop_img_list2 = [osp.join(root, f) for root, _, files in os.walk(clean_dst) for f in files if 'jpg' in f]
clean_dst2 = osp.join(src, f"images_{cam:02}_2th_clean")
if not osp.exists(clean_dst2):
    os.mkdir(osp.join(src, f"images_{cam:02}_2th_clean"))

In [None]:
# pid_map = {10:3, 18:3, 8:4, 25:5, 11:5, 9:6, 27:13, 22:13, 29:15, 23:15, 24:16, 21:17, 26:7, 20:7}
for path in tqdm(sorted(crop_img_list2)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    pid, _, uniform, timestamp, index = fname.split("_")
    index = int(index)
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid in pid_map:
        pid = pid_map[pid]
    img_name = f"{pid:04}_c{cam}s1_{uniform}_{timestamp}{index}.jpg"
    df["tracking_id_label"].loc[index] = f"01_{pid:04}"
    df['fname'].loc[index] = img_name
    shutil.copy(path, osp.join(clean_dst2, img_name))

In [None]:
df.loc[:10]

In [None]:
df.to_csv(osp.join(src, f"{branch}_clean.csv"))

In [None]:
df['tracking_id_label'].unique()

In [None]:
sample_df.head()

In [None]:
osp.join(dst, f"{fname}.jpg")

In [None]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        continue

    for i in range(len(labels)):
        x1 = int(labels.iloc[i]['x1_label'])
        y1 = int(labels.iloc[i]['y1_label'])
        x2 = int(labels.iloc[i]['x2_label'])
        y2 = int(labels.iloc[i]['y2_label'])
        uniform = int(labels.iloc[i]['uniform_label'])
        occlude = int(labels.iloc[i]['occlude_label'])
        pid = int(labels.iloc[i]['tracking_id_label'].split("_")[-1])
        spt_x = int(labels.iloc[i]['stand_x']) 
        spt_y = int(labels.iloc[i]['stand_y'])
        color = (0,255,0)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
        cv2.circle(img, (spt_x, spt_y), 7, (255,255,255), -1)
        cv2.circle(img, (spt_x, spt_y), 5, (0,0,255), -1)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
    cv2.imwrite(osp.join(dst, f"{fname}.jpg"), img)
    # break

In [None]:
import pycocotools.coco as coco
import json
from collections import defaultdict

data_path = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
coco_branch = 'flow_03'

coco_src = osp.join(data_path, coco_branch)
train_dst = osp.join(coco_src, 'train2017')
val_dst = osp.join(coco_src, 'val2017')
if not osp.exists(coco_src):
    os.mkdir(coco_src)
if not osp.exists(train_dst):
    os.mkdir(train_dst)
if not osp.exists(val_dst):
    os.mkdir(val_dst)
label_src = osp.join(src, f"{branch}.csv")
df = pd.read_csv(label_src)

img_paths = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if '.jpg' in f or '.png' in f]

In [None]:
reid_train_dst = osp.join(coco_src, f"bounding_box_train")
if not osp.exists(reid_train_dst):
    os.mkdir(reid_train_dst)
reid_test_dst = osp.join(coco_src, f"bounding_box_test")
if not osp.exists(reid_test_dst):
    os.mkdir(reid_test_dst)
reid_query_dst = osp.join(coco_src, f"query")
if not osp.exists(reid_query_dst):
    os.mkdir(reid_query_dst)

cam = 3
reid_src = osp.join(data_path, f"images_{cam:02}_2th_clean")
reid_img_list = [osp.join(root, f) for root, _, files in os.walk(reid_src) for f in files if 'jpg' in f]

In [None]:
pids = defaultdict(int)
for path in reid_img_list:
    img = Image.open(path)
    w, h = img.size
    if w < 80 or h < 80:
        continue
    fname = osp.basename(path)
    pid = int(fname[:4])
    pids[pid] += 1
for path in reid_img_list:
    img = Image.open(path)
    w, h = img.size
    if w < 80 or h < 80:
        continue
    fname = osp.basename(path)
    pid = int(fname[:4])
    if pid in pids and pids[pid] < 4:
        continue
    fname = f"{pid:04}" + fname[4:]
    shutil.copy(path, osp.join(reid_train_dst, fname))
    # break

In [None]:
img_paths[:10]

In [None]:
dataset = {
    "info": {},
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": []
}
dataset['categories'].append({
    'id': 1,
    'name': "person",
    'supercategory': "people",
    'keypoints': [0],
    'skeleton': []
})

# anno_paths = [osp.join(root, f) for root, _, files in os.walk(src) for f in files if '.txt' in f]

sub_index = 0
idx = 0
for path in tqdm(img_paths):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    dst = osp.basename(path)
    anns = df[df['file_name']==dst]
    dst = osp.join(train_dst, dst)
    if len(anns) > 0:
        shutil.copy(path, dst)
    else:
        continue
    idx += 1
    dataset['images'].append({
        'coco_url': '',
        'date_captured': '',
        'file_name': osp.basename(dst),
        'flickr_url': '',
        'id': idx,
        'license': 0,
        'width': w,
        'height': h
    })

    for i in range(len(anns)):
        sub_index += 1
        x1 = int(anns.iloc[i]['x1_label'])
        y1 = int(anns.iloc[i]['y1_label'])
        x2 = int(anns.iloc[i]['x2_label'])
        y2 = int(anns.iloc[i]['y2_label'])
        sx = int(anns.iloc[i]['stand_x'])
        sy = int(anns.iloc[i]['stand_y'])
        uniform = int(anns.iloc[i]['uniform_label'])
        occlude = int(anns.iloc[i]['occlude_label'])
        width = x2 - x1
        height = y2 - y1
        identity = int(anns.iloc[i]['tracking_id_label'].split("_")[-1])
        # if width < 50 and height < 100:
        #     identity = -1
        # if occlude == 2:
        #     identity = -1

        dataset['annotations'].append({
            'area': int(width*height),
            'bbox': [int(x1),int(y1),int(width),int(height)],
            'category_id': 1,
            'pid': identity,
            'id': sub_index,
            'image_id': idx,
            'iscrowd': 0,
            'num_keypoints':1,
            'keypoints': [[sx, sy]],
            'segmentation': [],
        })
    # shutil.copy(path, osp.join(val_dst, osp.basename(path)))
    # break

In [None]:
json_dst = osp.join(coco_src, 'original')
if not osp.exists(json_dst):
    os.mkdir(json_dst)
with open(osp.join(json_dst, 'instances_train2017.json'), 'w') as f:
    json.dump(dataset, f)

In [None]:
json_dst