In [2]:
import os
import os.path as osp
import numpy as np
import math
import pandas as pd
import cv2
import re
from datetime import datetime
from tqdm import tqdm
import shutil
import random
from PIL import Image, ImageOps
def to_pil(cv_img):
    img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
    return Image.fromarray(img)
src = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
branch = "Kaohsiung_02"
cam = int(branch.split("_")[-1])
img_list = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if 'jpg' in f]

In [2]:
dst = osp.join(src, f"images_{cam:02}")
if not osp.exists(dst):
    os.mkdir(osp.join(src, f"images_{cam:02}"))

In [2]:
dst = osp.join(src, f"sample_images_{cam:02}")
if not osp.exists(dst):
    os.mkdir(dst)

In [3]:
df = pd.read_csv(osp.join(src, f"{branch}.csv"))

In [4]:
df['fname'] = pd.Series(index=df.index, dtype=str)
df.head()

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y,fname
0,02_20200402140000.jpg,1530,554,1711,746,0,02_0001,1,1534,728,
1,02_20200402140000.jpg,1626,483,1772,642,1,02_0002,1,1624,652,
2,02_20200402140000.jpg,403,707,490,761,0,02_0003,0,478,740,
3,02_20200402140000.jpg,404,701,484,759,2,02_0004,0,470,736,
4,02_20200402140001.jpg,1530,554,1711,746,0,02_0001,1,1534,728,


In [5]:
df[df.isna().any(axis=1)]

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y,fname
0,02_20200402140000.jpg,1530,554,1711,746,0,02_0001,1,1534,728,
1,02_20200402140000.jpg,1626,483,1772,642,1,02_0002,1,1624,652,
2,02_20200402140000.jpg,403,707,490,761,0,02_0003,0,478,740,
3,02_20200402140000.jpg,404,701,484,759,2,02_0004,0,470,736,
4,02_20200402140001.jpg,1530,554,1711,746,0,02_0001,1,1534,728,
...,...,...,...,...,...,...,...,...,...,...,...
5328,02_20200402141259.jpg,483,651,598,732,0,02_0004,0,583,721,
5329,02_20200402141259.jpg,414,726,487,766,1,02_0012,0,484,761,
5330,02_20200402141259.jpg,372,1097,427,1125,0,02_0013,0,423,1106,
5331,02_20200402141259.jpg,410,744,494,811,0,02_0016,0,490,782,


In [6]:
df = df.dropna()

In [7]:
np.random.seed(0)
sample = np.random.randint(0, len(df), 264)
df = df.loc[sample]

In [5]:
def get_rotate_angle(dx, dy):
    if dy == 0:
        return 0
    angle = np.arctan(np.abs(dx/dy)) / math.pi * 180
    if dx > 0 and dy < 0:
        return angle
    elif dx < 0 and dy < 0:
        return -1 * angle
    elif dx > 0 and dy > 0:
        return 180 - angle
    else:
        return -1 * (180 - angle)

def get_normalized_img(crop, pos):
    pil_img = to_pil(crop)
    crop_w, crop_h = pil_img.size
    x1, y1, x2, y2 = pos
    x, y = (x1+x2) / 2, (y1+y2) / 2
    diag_len = np.sqrt(crop_w**2 + crop_h**2)
    delta_h = diag_len - crop_h
    padding = (0, int(delta_h//2), 0, int(delta_h//2))
    pil_img = ImageOps.expand(pil_img, padding)
    angle = get_rotate_angle(x-cx, y-cy)
    pil_img = pil_img.rotate(angle)
    return pil_img

In [6]:
dst

'/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/images_02'

In [7]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
f = open(osp.join(src, f'{branch}.txt'), 'w')
normalized_imgs = []
temp_timestamp = 0
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        # print(fname)
        continue

    for i in labels.index:
        x1 = int(labels.loc[i]['x1_label'])
        y1 = int(labels.loc[i]['y1_label'])
        x2 = int(labels.loc[i]['x2_label'])
        y2 = int(labels.loc[i]['y2_label'])
        sx = int(labels.loc[i]['stand_x'])
        sy = int(labels.loc[i]['stand_y'])
        f.writelines(f"{fname} -1 {x1} {y1} {x2} {y2} {sx} {sy}\n")
        uniform = int(labels.loc[i]['uniform_label'])
        occlude = int(labels.loc[i]['occlude_label'])
        pid = labels.loc[i]['tracking_id_label'].split("_")[-1]
        color = (0,255,0)
        if uniform:
            color = (0,0,255)
        if occlude == 2:
            color = (0,0,0)
        if x2-x1 != 0 and y2-y1 != 0:
            normalized_img = get_normalized_img(img[y1:y2, x1:x2, :], (x1, y1, x2, y2))
            crop_w, crop_h = normalized_img.size
            if crop_w < 50 and crop_h < 100:
                df["tracking_id_label"].loc[i] = "-1"
                continue
            if occlude == 2:
                df["tracking_id_label"].loc[i] = "-1"
                continue
            date_str = fname.split("_")[-1]
            year, month, day, hour, minute, second = map(int, pattern.search(date_str).groups())
            date = datetime(year, month, day, hour, minute, second)
            timestamp = int(datetime.timestamp(date))
            img_name = f"{pid}_c{cam}s1_{uniform}_{timestamp}_{i:04}.jpg"
            df["tracking_id_label"].loc[i] = f"01_{pid}"
            df['fname'].loc[i] = img_name
            # break
            normalized_img.save(osp.join(dst, img_name))
        else:
            print(fname)
f.close()

100%|██████████| 1700/1700 [01:35<00:00, 22.28it/s]


In [8]:
crop_img_list = [osp.join(root, f) for root, _, files in os.walk(dst) for f in files if 'jpg' in f]
clean_dst = osp.join(src, f"images_{cam:02}_1th_clean")
if not osp.exists(clean_dst):
    os.mkdir(osp.join(src, f"images_{cam:02}_1th_clean"))

In [9]:
len(crop_img_list) == len(df['fname'].dropna())

True

In [10]:
temp_timestamp = -1
temp_pid = -1
num = 1
for path in tqdm(sorted(crop_img_list)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    try:
        pid, _, uniform, timestamp, index = fname.split("_")
        index = int(index)
    except:
        print(fname)
        break
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid != temp_pid and temp_pid > 0:
        num += 1
    else:
        if np.abs(timestamp - temp_timestamp) > 50 and temp_timestamp > 0:
            num += 1
    temp_pid = pid
    temp_timestamp = timestamp
    img_name = f"{num:04}_c{cam}s1_{uniform}_{timestamp}_{index}.jpg"
    df["tracking_id_label"].loc[index] = f"01_{num:04}"
    df['fname'].loc[index] = img_name
    shutil.copy(path, osp.join(clean_dst, img_name))

100%|██████████| 3997/3997 [00:03<00:00, 1267.94it/s]


In [11]:
df.loc[:10]

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y,fname
0,02_20200402140000.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_1585807200_0.jpg
1,02_20200402140000.jpg,1626,483,1772,642,1,01_0005,1,1624,652,0005_c2s1_1_1585807200_1.jpg
2,02_20200402140000.jpg,403,707,490,761,0,01_0007,0,478,740,0007_c2s1_0_1585807200_2.jpg
3,02_20200402140000.jpg,404,701,484,759,2,-1,0,470,736,
4,02_20200402140001.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_1585807201_4.jpg
5,02_20200402140001.jpg,1626,483,1772,642,1,01_0005,1,1624,652,0005_c2s1_1_1585807201_5.jpg
6,02_20200402140001.jpg,436,629,514,672,0,01_0007,0,507,664,0007_c2s1_0_1585807201_6.jpg
7,02_20200402140001.jpg,436,627,511,685,2,-1,0,499,660,
8,02_20200402140002.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_1585807202_8.jpg
9,02_20200402140002.jpg,1679,525,1776,642,1,01_0005,1,1624,652,0005_c2s1_1_1585807202_9.jpg


In [10]:
pids = {
    1:[18,6,81],
    2:[31,78],
    4:[15,36,69],
    5:[14,26,59,70],
    7:[13,27,39,65,75],
    8:[11,42,72],
    9:[19,32,62,73],
    10:[12],
    16:[23,24,66,74],
    }
pid_map = {}
for pid in pids:
    for copy in pids[pid]:
        pid_map[copy] = pid

In [12]:
pids = {
    1:[4,12,21,22,37],
    2:[3,13],
    5:[6,17],
    8:[29],
    38:[74]
    }
pid_map = {}
for pid in pids:
    for copy in pids[pid]:
        pid_map[copy] = pid

In [13]:
crop_img_list2 = [osp.join(root, f) for root, _, files in os.walk(clean_dst) for f in files if 'jpg' in f]
clean_dst2 = osp.join(src, f"images_{cam:02}_2th_clean")
if not osp.exists(clean_dst2):
    os.mkdir(osp.join(src, f"images_{cam:02}_2th_clean"))

In [14]:
# pid_map = {10:3, 18:3, 8:4, 25:5, 11:5, 9:6, 27:13, 22:13, 29:15, 23:15, 24:16, 21:17, 26:7, 20:7}
for path in tqdm(sorted(crop_img_list2)):
    fname = osp.basename(path)
    fname = osp.splitext(fname)[0]
    pid, _, uniform, timestamp, index = fname.split("_")
    index = int(index)
    pid = int(pid)
    uniform = int(uniform)
    timestamp = int(timestamp)
    if pid in pid_map:
        pid = pid_map[pid]
    img_name = f"{pid:04}_c{cam}s1_{uniform}_{timestamp}{index}.jpg"
    df["tracking_id_label"].loc[index] = f"01_{pid:04}"
    df['fname'].loc[index] = img_name
    shutil.copy(path, osp.join(clean_dst2, img_name))

100%|██████████| 3997/3997 [00:02<00:00, 1665.75it/s]


In [15]:
df.loc[:10]

Unnamed: 0,file_name,x1_label,y1_label,x2_label,y2_label,occlude_label,tracking_id_label,uniform_label,stand_x,stand_y,fname
0,02_20200402140000.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_15858072000.jpg
1,02_20200402140000.jpg,1626,483,1772,642,1,01_0005,1,1624,652,0005_c2s1_1_15858072001.jpg
2,02_20200402140000.jpg,403,707,490,761,0,01_0007,0,478,740,0007_c2s1_0_15858072002.jpg
3,02_20200402140000.jpg,404,701,484,759,2,-1,0,470,736,
4,02_20200402140001.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_15858072014.jpg
5,02_20200402140001.jpg,1626,483,1772,642,1,01_0005,1,1624,652,0005_c2s1_1_15858072015.jpg
6,02_20200402140001.jpg,436,629,514,672,0,01_0007,0,507,664,0007_c2s1_0_15858072016.jpg
7,02_20200402140001.jpg,436,627,511,685,2,-1,0,499,660,
8,02_20200402140002.jpg,1530,554,1711,746,0,01_0002,1,1534,728,0002_c2s1_1_15858072028.jpg
9,02_20200402140002.jpg,1679,525,1776,642,1,01_0005,1,1624,652,0005_c2s1_1_15858072029.jpg


In [16]:
df.to_csv(osp.join(src, f"{branch}_clean.csv"))

In [17]:
df['tracking_id_label'].unique()

array(['01_0002', '01_0005', '01_0007', '-1', '01_0010', '01_0014',
       '01_0018', '01_0001', '01_0025', '01_0008', '01_0030', '01_0032',
       '01_0035', '01_0038', '01_0047', '01_0042', '01_0051', '01_0053',
       '01_0056', '01_0033', '01_0043', '01_0039', '01_0058', '01_0054',
       '01_0048', '01_0061', '01_0063', '01_0065', '01_0068', '01_0071',
       '01_0088', '01_0090', '01_0101', '01_0102', '01_0105', '01_0094',
       '01_0112', '01_0114', '01_0126', '01_0125', '01_0124', '01_0129',
       '01_0130', '01_0131', '01_0128', '01_0127', '01_0132', '01_0133',
       '01_0118', '01_0119', '01_0120', '01_0019', '01_0015', '01_0011',
       '01_0059', '01_0036', '01_0026', '01_0040', '01_0044', '01_0024',
       '01_0136', '01_0075', '01_0072', '01_0077', '01_0079', '01_0083',
       '01_0085', '01_0086', '01_0081', '01_0087', '01_0092', '01_0091',
       '01_0093', '01_0137', '01_0135', '01_0095', '01_0134', '01_0097',
       '01_0098', '01_0100', '01_0099', '01_0031', '01_0

In [40]:
sample_df.head()

1911    01_20200405144152.jpg
3002    01_20200405144453.jpg
820     01_20200405143904.jpg
3698    01_20200405144712.jpg
843     01_20200405143911.jpg
Name: file_name, dtype: object

In [18]:
osp.join(dst, f"{fname}.jpg")

'/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/sample_images_03/03_20200419172139.jpg'

In [10]:
pattern = re.compile(r'(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})')
for path in tqdm(sorted(img_list)):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    cx, cy = w * 0.5, h * 0.5
    fname = osp.basename(path)
    labels = df[df['file_name']==fname]
    fname = osp.splitext(fname)[0]
    if len(labels) == 0:
        continue

    for i in range(len(labels)):
        x1 = int(labels.iloc[i]['x1_label'])
        y1 = int(labels.iloc[i]['y1_label'])
        x2 = int(labels.iloc[i]['x2_label'])
        y2 = int(labels.iloc[i]['y2_label'])
        uniform = int(labels.iloc[i]['uniform_label'])
        occlude = int(labels.iloc[i]['occlude_label'])
        pid = int(labels.iloc[i]['tracking_id_label'].split("_")[-1])
        spt_x = int(labels.iloc[i]['stand_x']) 
        spt_y = int(labels.iloc[i]['stand_y'])
        color = (0,255,0)
        cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
        cv2.circle(img, (spt_x, spt_y), 7, (255,255,255), -1)
        cv2.circle(img, (spt_x, spt_y), 5, (0,0,255), -1)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255,255,255), 2)
        cv2.putText(img, f"p[{pid}]", (x2+5, y1+5), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"u[{uniform}]", (x2+5, y1+30), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
        cv2.putText(img, f"o[{occlude}]", (x2+5, y1+55), cv2.FONT_HERSHEY_DUPLEX, 0.7, (0,0,0), 1)
    cv2.imwrite(osp.join(dst, f"{fname}.jpg"), img)
    # break

100%|██████████| 1300/1300 [00:50<00:00, 30.44it/s]


In [10]:
import pycocotools.coco as coco
import json
from collections import defaultdict

data_path = "/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data"
coco_branch = 'flow_01'

coco_src = osp.join(data_path, coco_branch)
train_dst = osp.join(coco_src, 'train2017')
val_dst = osp.join(coco_src, 'val2017')
if not osp.exists(coco_src):
    os.mkdir(coco_src)
if not osp.exists(train_dst):
    os.mkdir(train_dst)
if not osp.exists(val_dst):
    os.mkdir(val_dst)
label_src = osp.join(src, f"{branch}.csv")
df = pd.read_csv(label_src)

img_paths = [osp.join(root, f) for root, _, files in os.walk(osp.join(src, branch)) for f in files if '.jpg' in f or '.png' in f]

In [11]:
reid_train_dst = osp.join(coco_src, f"bounding_box_train")
if not osp.exists(reid_train_dst):
    os.mkdir(reid_train_dst)
reid_test_dst = osp.join(coco_src, f"bounding_box_test")
if not osp.exists(reid_test_dst):
    os.mkdir(reid_test_dst)
reid_query_dst = osp.join(coco_src, f"query")
if not osp.exists(reid_query_dst):
    os.mkdir(reid_query_dst)

cam = 1
reid_src = osp.join(data_path, f"images_{cam:02}_2th_clean")
reid_img_list = [osp.join(root, f) for root, _, files in os.walk(reid_src) for f in files if 'jpg' in f]

In [9]:
pids = d
for path in reid_img_list:
    img = Image.open(path)
    w, h = img.size
    if w < 80 or h < 80:
        continue
    fname = osp.basename(path)
    pid = int(fname[:4])
    fname = f"{pid:04}" + fname[4:]
    shutil.copy(path, osp.join(reid_train_dst, fname))
    # break

In [3]:
img_paths[:10]

['/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170035.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170258.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170329.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419171230.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170403.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170208.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170233.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170405.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419170802.jpg',
 '/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Yilan_03/03_20200419171322.jpg']

In [4]:
dataset = {
    "info": {},
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": []
}
dataset['categories'].append({
    'id': 1,
    'name': "person",
    'supercategory': "people",
    'keypoints': [0],
    'skeleton': []
})

# anno_paths = [osp.join(root, f) for root, _, files in os.walk(src) for f in files if '.txt' in f]

sub_index = 0
idx = 0
for path in tqdm(img_paths):
    img = cv2.imread(path)
    h, w = img.shape[:2]
    dst = osp.basename(path)
    anns = df[df['file_name']==dst]
    dst = osp.join(train_dst, dst)
    if len(anns) > 0:
        shutil.copy(path, dst)
    else:
        continue
    idx += 1
    dataset['images'].append({
        'coco_url': '',
        'date_captured': '',
        'file_name': osp.basename(dst),
        'flickr_url': '',
        'id': idx,
        'license': 0,
        'width': w,
        'height': h
    })

    for i in range(len(anns)):
        sub_index += 1
        x1 = int(anns.iloc[i]['x1_label'])
        y1 = int(anns.iloc[i]['y1_label'])
        x2 = int(anns.iloc[i]['x2_label'])
        y2 = int(anns.iloc[i]['y2_label'])
        sx = int(anns.iloc[i]['stand_x'])
        sy = int(anns.iloc[i]['stand_y'])
        uniform = int(anns.iloc[i]['uniform_label'])
        occlude = int(anns.iloc[i]['occlude_label'])
        width = x2 - x1
        height = y2 - y1
        identity = int(anns.iloc[i]['tracking_id_label'].split("_")[-1])
        # if width < 50 and height < 100:
        #     identity = -1
        # if occlude == 2:
        #     identity = -1

        dataset['annotations'].append({
            'area': int(width*height),
            'bbox': [int(x1),int(y1),int(width),int(height)],
            'category_id': 1,
            'pid': identity,
            'id': sub_index,
            'image_id': idx,
            'iscrowd': 0,
            'num_keypoints':1,
            'keypoints': [[sx, sy]],
            'segmentation': [],
        })
    # shutil.copy(path, osp.join(val_dst, osp.basename(path)))
    # break

100%|██████████| 1300/1300 [00:53<00:00, 24.28it/s]


In [5]:
json_dst = osp.join(coco_src, 'original')
if not osp.exists(json_dst):
    os.mkdir(json_dst)
with open(osp.join(json_dst, 'instances_train2017.json'), 'w') as f:
    json.dump(dataset, f)

In [13]:
json_dst

'/media/acer/5f45949f-0fc7-4475-965b-e61989afcc10/FlowInc_data/Banqiao_01/original'