In [144]:
from glob import glob
import pandas as pd
import os
import traceback
import yaml
import numpy as np
import shutil
from IPython.display import Image, display

In [2]:
img_path = pd.Series(glob("../../data/anh/*.jpg"))

In [3]:
yml_path = pd.Series(glob("../../data/anh_labeled/*_anh_labeled*/*.yaml"))

In [4]:
img_path_df = pd.DataFrame({
    "img_path" :img_path,
    "img_id": img_path.map(lambda x: os.path.basename(x).replace(".jpg", ""))
})

In [5]:
yml_df = pd.DataFrame({
    "yml_path": yml_path
})

In [6]:
def parse_yaml(file_path):
    with open(file_path) as f:
        metadata = yaml.safe_load(f)
        metadata['yml_path'] = file_path
        if "region_code_gt" not in metadata:
            metadata['region_code_gt'] = ""
        return pd.Series(metadata)

In [7]:
parse_yaml(yml_path[0])

image_file                               TimePhoto_20210504_150732.jpg
image_height                                                       480
image_width                                                        720
plate_corners_gt                       163 235 442 254 458 296 196 269
plate_inverted_gt                                                False
plate_number_gt                                               20C00022
region_code_gt                                                        
yml_path             ../../data/anh_labeled/Thang_anh_labeled/TimeP...
dtype: object

In [8]:
yml_df = yml_df.yml_path.apply(parse_yaml)

In [9]:
yml_df['img_id'] = yml_df.image_file.str.replace(".jpg", "")

In [10]:
yml_df['yml_file_name'] = yml_df.yml_path.apply(lambda x: os.path.basename(x))

In [11]:
yml_df['worker'] = yml_df.yml_path.apply(lambda x: "Thang" if "Thang" in x else "Thanh")

In [12]:
yml_df.region_code_gt = yml_df.region_code_gt.str.lower()

In [13]:
df = pd.merge(img_path_df, yml_df, on="img_id")

In [27]:
duplicate_ids = df[df.img_id.duplicated()].img_id.to_list()

In [28]:
duplicate_df =  df[df.img_id.isin(duplicate_ids)]

In [29]:
duplicate_df

Unnamed: 0,img_id,img_path,image_file,image_height,image_width,plate_corners_gt,plate_inverted_gt,plate_number_gt,region_code_gt,yml_path,yml_file_name,worker
2,12a-092.41t,../../data/anh/12a-092.41t.jpg,12a-092.41t.jpg,480,720,526 230 568 222 576 268 534 279,False,12A09241,,../../data/anh_labeled/Thang_anh_labeled/12a-0...,12a-092.41t-0.yaml,Thang
3,12a-092.41t,../../data/anh/12a-092.41t.jpg,12a-092.41t.jpg,480,720,524 230 568 222 575 269 533 281,False,12A09241,,../../data/anh_labeled/Thanh_anh_labeled_21052...,12a-092.41t-0.yaml,Thanh
16,20a-001.69x,../../data/anh/20a-001.69x.jpg,20a-001.69x.jpg,480,720,192 228 234 236 234 283 191 272,True,20A00169,,../../data/anh_labeled/Thang_anh_labeled/20a-0...,20a-001.69x-0.yaml,Thang
17,20a-001.69x,../../data/anh/20a-001.69x.jpg,20a-001.69x.jpg,480,720,189 226 239 235 239 287 190 275,False,20A00169,,../../data/anh_labeled/Thanh_anh_labeled_21052...,20a-001.69x-0.yaml,Thanh
21,20a-015.79t,../../data/anh/20a-015.79t.jpg,20a-015.79t.jpg,480,720,485 234 528 227 537 270 494 280,False,20A01579,,../../data/anh_labeled/Thang_anh_labeled/20a-0...,20a-015.79t-0.yaml,Thang
22,20a-015.79t,../../data/anh/20a-015.79t.jpg,20a-015.79t.jpg,480,720,482 233 529 224 540 273 490 284,False,20A01579,,../../data/anh_labeled/Thanh_anh_labeled_21052...,20a-015.79t-0.yaml,Thanh
31,20a-045.15t,../../data/anh/20a-045.15t.jpg,20a-045.15t.jpg,480,720,411 316 492 304 492 322 411 339,False,20A04515,?,../../data/anh_labeled/Thang_anh_labeled/20a-0...,20a-045.15t-0.yaml,Thang
32,20a-045.15t,../../data/anh/20a-045.15t.jpg,20a-045.15t.jpg,480,720,409 315 492 303 493 323 410 340,False,20A04515,,../../data/anh_labeled/Thanh_anh_labeled_21052...,20a-045.15t-0.yaml,Thanh
38,20a-075.63t,../../data/anh/20a-075.63t.jpg,20a-075.63t.jpg,480,720,215 232 267 242 257 290 210 278,False,20A07563,,../../data/anh_labeled/Thang_anh_labeled/20a-0...,20a-075.63t-0.yaml,Thang
39,20a-075.63t,../../data/anh/20a-075.63t.jpg,20a-075.63t.jpg,480,720,215 232 265 242 258 290 208 277,False,20A07563,,../../data/anh_labeled/Thanh_anh_labeled_21052...,20a-075.63t-0.yaml,Thanh


In [30]:
def show_img(input_df, img_id):
    utt_df = input_df[input_df.img_id == img_id]
    display(utt_df)
    display(Image(utt_df.iloc[0].img_path))

In [31]:
def check_conflict(dup_df):
    return (dup_df.iloc[0].plate_number_gt == dup_df.iloc[1].plate_number_gt) and (dup_df.iloc[0].region_code_gt == dup_df.iloc[1].region_code_gt)

In [32]:
tmp_df = pd.DataFrame(duplicate_df.groupby("img_id").apply(check_conflict)).reset_index()

In [33]:
conflict_ids = tmp_df[~tmp_df[0]].img_id.to_list()

In [34]:
conflict_ids

['20a-045.15t',
 '20a-478.82t',
 '20l-3334',
 '29u-9953',
 '30a-025.62t',
 '30y-5835']

In [26]:
# show_img(duplicate_df, conflict_utts[0])

In [62]:
filtered_ids = duplicate_df[((duplicate_df.worker == 'Thanh') | duplicate_df.img_id.isin(conflict_ids))].img_id.to_list()

In [64]:
filtered_df = df[~df.img_id.isin(filtered_ids)]

## Statistics for filtered DF

In [66]:
filtered_df.worker.value_counts()

Thanh    489
Thang    110
Name: worker, dtype: int64

In [67]:
filtered_df.region_code_gt.value_counts()

     552
x     47
Name: region_code_gt, dtype: int64

In [72]:
filtered_df.plate_inverted_gt.value_counts()

False    592
True       7
Name: plate_inverted_gt, dtype: int64

In [80]:
filtered_df.plate_number_gt.describe()

count     599
unique    298
top         ?
freq       10
Name: plate_number_gt, dtype: object

In [89]:
# show_img(filtered_df, "12a-006.10t")

## Convert to training format

In [138]:
train_df = filtered_df[filtered_df.worker=="Thanh"]

In [179]:
test_df = filtered_df[filtered_df.worker!='Thanh']

In [180]:
test_df.to_pickle("test_df.pkl")

In [181]:
train_df.to_pickle("train_df.pkl")

In [97]:
dim = np.array((720, 480))

In [113]:
def normalize_plate_corner(plate_corners_gt):
    plate_corners = np.fromstring(plate_corners_gt, dtype=int, sep=' ').reshape(-1, 2).astype(float)
    return plate_corners / dim

In [140]:
train_df['normalized_plate_corner'] = train_df.plate_corners_gt.apply(normalize_plate_corner)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [141]:
def convert_to_alpr_uncontrained(normalized_plate_corner):
    tl_x, tl_y, tr_x, tr_y, br_x, br_y, bl_x, bl_y = normalized_plate_corner.flatten()
    return np.array([tl_x,tr_x,br_x,bl_x,tl_y,tr_y,br_y,bl_y])

In [142]:
train_df['alpr_uncontrained_format'] = train_df.normalized_plate_corner.apply(convert_to_alpr_uncontrained)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [143]:
train_df.iloc[0].alpr_uncontrained_format

array([0.71805556, 0.76111111, 0.75972222, 0.71805556, 0.675     ,
       0.66666667, 0.7125    , 0.72708333])

In [145]:
train_df

Unnamed: 0,img_id,img_path,image_file,image_height,image_width,plate_corners_gt,plate_inverted_gt,plate_number_gt,region_code_gt,yml_path,yml_file_name,worker,normalized_plate_corner,alpr_uncontrained_format
0,1,../../data/anh/1.jpg,1.jpg,480,720,517 324 548 320 547 342 517 349,False,20K6266,,../../data/anh_labeled/Thanh_anh_labeled_21052...,1-0.yaml,Thanh,"[[0.7180555555555556, 0.675], [0.7611111111111...","[0.7180555555555556, 0.7611111111111111, 0.759..."
1,12a-006.10t,../../data/anh/12a-006.10t.jpg,12a-006.10t.jpg,480,720,430 238 483 237 486 280 432 284,False,12A00610,,../../data/anh_labeled/Thanh_anh_labeled_21052...,12a-006.10t-0.yaml,Thanh,"[[0.5972222222222222, 0.49583333333333335], [0...","[0.5972222222222222, 0.6708333333333333, 0.675..."
4,12c-018.76t,../../data/anh/12c-018.76t.jpg,12c-018.76t.jpg,480,720,454 347 488 337 491 373 455 387,False,12C01876,,../../data/anh_labeled/Thanh_anh_labeled_21052...,12c-018.76t-0.yaml,Thanh,"[[0.6305555555555555, 0.7229166666666667], [0....","[0.6305555555555555, 0.6777777777777778, 0.681..."
5,14a-130.02t,../../data/anh/14a-130.02t.jpg,14a-130.02t.jpg,480,720,501 241 551 232 559 282 508 291,False,14A13002,,../../data/anh_labeled/Thanh_anh_labeled_21052...,14a-130.02t-0.yaml,Thanh,"[[0.6958333333333333, 0.5020833333333333], [0....","[0.6958333333333333, 0.7652777777777777, 0.776..."
6,14a-265.81t,../../data/anh/14a-265.81t.jpg,14a-265.81t.jpg,480,720,505 208 555 200 560 250 511 263,False,14A26581,,../../data/anh_labeled/Thanh_anh_labeled_21052...,14a-265.81t-0.yaml,Thanh,"[[0.7013888888888888, 0.43333333333333335], [0...","[0.7013888888888888, 0.7708333333333334, 0.777..."
7,14b-026.34t,../../data/anh/14b-026.34t.jpg,14b-026.34t.jpg,480,720,209 311 245 314 244 349 208 344,False,14B02634,,../../data/anh_labeled/Thanh_anh_labeled_21052...,14b-026.34t-0.yaml,Thanh,"[[0.2902777777777778, 0.6479166666666667], [0....","[0.2902777777777778, 0.3402777777777778, 0.338..."
8,15c-146.66t,../../data/anh/15c-146.66t.jpg,15c-146.66t.jpg,480,720,189 312 215 316 215 349 188 343,False,15C14666,,../../data/anh_labeled/Thanh_anh_labeled_21052...,15c-146.66t-0.yaml,Thanh,"[[0.2625, 0.65], [0.2986111111111111, 0.658333...","[0.2625, 0.2986111111111111, 0.298611111111111..."
9,15c-232.92t,../../data/anh/15c-232.92t.jpg,15c-232.92t.jpg,480,720,420 381 462 373 462 410 419 418,False,15C23292,,../../data/anh_labeled/Thanh_anh_labeled_21052...,15c-232.92t-0.yaml,Thanh,"[[0.5833333333333334, 0.79375], [0.64166666666...","[0.5833333333333334, 0.6416666666666667, 0.641..."
10,16m-9732,../../data/anh/16m-9732.jpg,16m-9732.jpg,480,720,414 263 473 261 471 308 413 313,False,16M9732,,../../data/anh_labeled/Thanh_anh_labeled_21052...,16m-9732-0.yaml,Thanh,"[[0.575, 0.5479166666666667], [0.6569444444444...","[0.575, 0.6569444444444444, 0.6541666666666667..."
11,16n-3384,../../data/anh/16n-3384.jpg,16n-3384.jpg,480,720,211 284 251 291 252 337 210 327,False,16N3384,,../../data/anh_labeled/Thanh_anh_labeled_21052...,16n-3384-0.yaml,Thanh,"[[0.29305555555555557, 0.5916666666666667], [0...","[0.29305555555555557, 0.3486111111111111, 0.35..."


In [175]:
outdir = "my_label"
def create_train_data_dir(row):
    shutil.copyfile(row.img_path, outdir + "/" + row.image_file)
    with open(outdir + "/" + row.img_id + ".txt", "w") as f:
        corners_str = ','.join(str(v) for v in row.alpr_uncontrained_format)
        f.write("4," + corners_str + ",,")

In [176]:
train_df.loc[[0]]

Unnamed: 0,img_id,img_path,image_file,image_height,image_width,plate_corners_gt,plate_inverted_gt,plate_number_gt,region_code_gt,yml_path,yml_file_name,worker,normalized_plate_corner,alpr_uncontrained_format
0,1,../../data/anh/1.jpg,1.jpg,480,720,517 324 548 320 547 342 517 349,False,20K6266,,../../data/anh_labeled/Thanh_anh_labeled_21052...,1-0.yaml,Thanh,"[[0.7180555555555556, 0.675], [0.7611111111111...","[0.7180555555555556, 0.7611111111111111, 0.759..."


In [178]:
train_df.apply(create_train_data_dir, axis=1)

0      None
1      None
4      None
5      None
6      None
7      None
8      None
9      None
10     None
11     None
12     None
13     None
14     None
15     None
18     None
19     None
20     None
23     None
24     None
25     None
26     None
27     None
28     None
29     None
30     None
33     None
34     None
35     None
36     None
37     None
       ... 
633    None
634    None
635    None
636    None
639    None
640    None
641    None
642    None
643    None
644    None
645    None
646    None
647    None
648    None
649    None
650    None
651    None
652    None
653    None
654    None
655    None
656    None
661    None
662    None
665    None
666    None
667    None
668    None
669    None
670    None
Length: 489, dtype: object