In [1]:
import sys
sys.path.insert(0,'insectrec/')
import numpy as np
seed = 42
np.random.seed(seed)
import pandas as pd
import os, shutil, glob, cv2, argparse
from natsort import natsorted
from utils import clean_folder, get_plate_names, export_labels, SAVE_DIR, read_plate, save_insect_crops
from tqdm import tqdm

# parser = argparse.ArgumentParser()
# parser.add_argument('--datadir', help="directory of sticky plate images")
# parser.add_argument('--years', nargs='+')
# parser.add_argument('--clean', dest='clean', action='store_true')
# parser.add_argument('--no-clean', dest='clean', action='store_false')
# parser.add_argument('--yolo_to_voc', dest='yolo_to_voc', action='store_true', 
#                     help='In the end of the script, yolo annotations get converted to voc')
# parser.add_argument('--save_extractions', dest='save_extractions', action='store_true', 
#                     help='Whether to save the extracted insect crops ')
# parser.add_argument('--nb_classes', type=int, choices=[3,6,9,21], default=6)

# parser.set_defaults(clean=True, yolo_to_voc=True, save_extractions=True)

# args = parser.parse_args()
# assert isinstance(args.datadir, str) and os.path.isdir(args.datadir), 'Provide a valid path'
# if not len(args.datadir): # /home/kalfasyan/data/images/sticky_plates/
# 	raise ValueError("Please provide a datadir argument.")

datadir = '/home/kalfasyan/data/images/sticky_plates/'
created_data_path = f'{datadir}created_data'

# CREATING NECESSARY DIRECTORIES FOR THE PROJECT
path_annotations = f'{created_data_path}/annotations/'
path_images = f'{created_data_path}/images/'
path_voc_annotations = f'{created_data_path}/voc_annotations/'
path_crops_export = f'{created_data_path}/crops_export/'
path_images_augmented = f'{created_data_path}/images_augmented/'
path_weights = f'{created_data_path}/weights/'
path_logs = f'{created_data_path}/logs/'
for path in [created_data_path, path_annotations, path_images, path_voc_annotations, 
            path_crops_export, path_weights, path_logs, path_images_augmented]:
    if not os.path.isdir(path):
        os.mkdir(path)	

clean = False

In [3]:
if clean:
    print(f'Cleaning directories..')
    clean_folder(path_annotations)
    clean_folder(path_images)
    clean_folder(path_voc_annotations)
    os.system(f'rm -rf {path_crops_export}*')
    os.system(f'rm -rf {path_images_augmented}*')
    os.system(f'rm {created_data_path}/df_*')
    os.system(f'rm {created_data_path}/class_mapping.csv')
    assert len(os.listdir(path_crops_export)) <= 0, "Wrong"

# Get name data from the sticky plates (their names)
BASE_DATA_DIR = f"{datadir}"
years = ['2019','2020']
assert all([y in ['2019','2020'] for y in years]), 'Wrong year given or in wrong format.'
plates = []
for y in years:
	y_plates = get_plate_names(y, base_dir=BASE_DATA_DIR)
	plates += y_plates
	print(f"Number of plates: {len(y_plates)} for year: {y}")

	# Create classes.txt for yolo annotations 
	# and a class_mapping.csv with the human readable labels

print(f"Number of ALL plates: {len(plates)}")

export_labels(created_data_dir=created_data_path, years=years)
class_map = pd.read_csv(f'{created_data_path}/class_mapping.csv')
assert len(class_map), "Couldn't read class mapping"
sub = class_map[['class', 'class_encoded']].drop_duplicates()
nan_code = sub[sub['class'].isnull()]['class_encoded'].iloc[0]

# Create a dataframe to save some statistics about the plates
# such as the number of nans and number of unique insects per plate
short_platenames = pd.Series(plates).apply(lambda x: x.split("/")[-1][:-4])
df_stats = pd.DataFrame(columns=['nr_nans','unique_insects','annotated'], index=short_platenames)
all_specs = []

annotated_plates, incomplete_plates = [], []

# Plates to ignore, since they were found to contain bad data (blurred/misclassified etc.)
bad_plates = ["beauvech_w38_B_F10_51 mm_ISO160_1-15 s",
            "brainlal_w27_A_58_160_1-15 s_11_48 mm_Manual_Manual_6240 x 4160",
            "kampen_w36_B_F10_51 mm_ISO160_1-15 s",
            "brainelal_8719_B_81_160_1-15 s_11_48 mm_Manual_Manual_6240 x 4160"]

labview_cols = ['Center of Mass X.1', 'Center of Mass Y.1', 'Bounding Rect Left.1',
       'Bounding Rect Top.1', 'Bounding Rect Right.1',
       'Bounding Rect Bottom.1', 'Equivalent Ellipse Major Axis.1',
       'Equivalent Ellipse Minor Axis.1', 'Area.1', 'Convex Hull Area.1',
       'Orientation.1', 'Ratio of Equivalent Ellipse Axes.1',
       'Ratio of Equivalent Rect Sides.1', 'Elongation Factor.1',
       'Compactness Factor.1', 'Heywood Circularity Factor.1', 'Type Factor.1',
       'R', 'G', 'B']

In [4]:
nb_classes = 21
# Defining wan
if nb_classes == 21:
    wanted_classes = ['m','v','bl','c','wmv','v(cy)','bv','gaasvlieg',
                    'grv','k','kever','nl','psylloidea','sp','sst','sw',
                    't','vlieg','weg','wnv','wswl']
else:
    raise ValueError(f"Number of classes not accepted: {args.nb_classes} ")
#['m','v','bl','c','wmv','v(cy)','bv','sw','t']
# ['m','v','bl','c','wmv','v(cy)','bv','gaasvlieg','grv','k','kever','nl','psylloidea','sp','sst','sw','t','vlieg','weg','wnv','wswl']
print(f"\nInsect classes selected: {wanted_classes}\n")


Insect classes selected: ['m', 'v', 'bl', 'c', 'wmv', 'v(cy)', 'bv', 'gaasvlieg', 'grv', 'k', 'kever', 'nl', 'psylloidea', 'sp', 'sst', 'sw', 't', 'vlieg', 'weg', 'wnv', 'wswl']



In [12]:
# Loop through all plates and nested loop through all insects in the plates
for p, platename in tqdm(enumerate(plates)):
    # Defining the platename
    pname = platename.split('/')[-1][:-4] 
    if 'empty' in pname:
        continue

    # Skip some plates that you define in bad_plates
    if pname in bad_plates:
        print(f"SKIPPING BAD PLATE: {pname}")
        continue
        
    if pname.startswith('beauvech_w39_A_F10_5'):
        break
    else:
        continue

    # Reading the specifications of the plate
    spec = pd.read_csv(plates[p][:-4] + '.txt', sep="\t") 
    # Fetching column names (only needed once)
    if p == 0: 
        columns = [ii for ii in spec.columns if ii.endswith('.1')]
        colextensions = ['index', 'name plate', 'R','G','B']
        columns.extend(colextensions) # adding extra columns
    spec = spec[columns]
    spec.rename(columns={'index': 'insect_idx'}, inplace=True)
    spec.dropna(axis=0, how='any', inplace=True)

    # ADDING YOLO AND HUMAN-READABLE ANNOTATION TO COLUMNS
    cmap = class_map[class_map['platename'] == pname].drop_duplicates(subset='idx', keep='first')
    if not len(cmap):
        print(f"Class mapping is empty for {platename}\nSkipping..")
        continue
    sub_map = cmap[['idx','class_encoded']].set_index('idx')
    sub_map2 = cmap[['idx','class']].set_index('idx')
    spec['yolo_class'] = sub_map
    spec['normal_class'] = sub_map2

    # REMOVING UNWANTED CLASSES 
    spec = spec[spec.normal_class != 'st'] # removing "stuk" class
    spec = spec[spec.normal_class != 'vuil'] # removing "vuil" class
    spec = spec[spec.normal_class.apply(lambda x: '+' not in str(x))]

    # SELECTING WANTED CLASSES
    spec = spec[spec.normal_class.isin(wanted_classes)]

    # Replacing commas from labview columns with dots
    for col in labview_cols:
        spec[col] = spec[col].str.replace(",",".").astype(float)

    spec_nr_classes = spec['yolo_class'].unique().shape[0]
    condition1 = (spec_nr_classes >= 0)
    condition2 = True # (spec['yolo_class'].unique()[0] not in [nan_code, np.nan])
    condition3 = (spec['yolo_class'].isnull().sum() != spec['yolo_class'].shape[0])

    df_stats.loc[pname] = pd.Series({'nr_nans': spec[spec['yolo_class'] == nan_code].shape[0], 
                                        'unique_insects': spec['yolo_class'][spec['yolo_class'] != nan_code].unique().shape[0],
                                        'annotated': False})



302it [00:00, 526248.36it/s]

SKIPPING BAD PLATE: brainelal_8719_B_81_160_1-15 s_11_48 mm_Manual_Manual_6240 x 4160
SKIPPING BAD PLATE: brainlal_w27_A_58_160_1-15 s_11_48 mm_Manual_Manual_6240 x 4160
SKIPPING BAD PLATE: kampen_w36_B_F10_51 mm_ISO160_1-15 s
SKIPPING BAD PLATE: beauvech_w38_B_F10_51 mm_ISO160_1-15 s





In [48]:
spec = pd.read_csv(plates[p][:-4] + '.txt', sep="\t") 
spec = spec[columns]
spec.rename(columns={'index': 'insect_idx'}, inplace=True)
spec.dropna(axis=0, how='any', inplace=True)
# ADDING YOLO AND HUMAN-READABLE ANNOTATION TO COLUMNS
cmap = class_map[class_map['platename'] == pname].drop_duplicates(subset='idx', keep='first')
sub_map = cmap[['idx','class_encoded']].set_index('idx')
sub_map2 = cmap[['idx','class']].set_index('idx')
spec['yolo_class'] = sub_map
spec['normal_class'] = sub_map2
# REMOVING UNWANTED CLASSES 
spec = spec[spec.normal_class != 'st'] # removing "stuk" class
spec = spec[spec.normal_class != 'vuil'] # removing "vuil" class
spec = spec[spec.normal_class.apply(lambda x: '+' not in str(x))]

# SELECTING WANTED CLASSES
spec = spec[spec.normal_class.isin(wanted_classes)]
# Replacing commas from labview columns with dots
for col in labview_cols:
    spec[col] = spec[col].str.replace(",",".").astype(float)

spec_nr_classes = spec['yolo_class'].unique().shape[0]
spec['pname'] = pname
spec['year'] = platename.split('/')[len(BASE_DATA_DIR.split('/'))]  
# Making extracted boxes squares (to avoid distortions in future resizing)
spec['width'] = 150
spec['height'] = 150
# Creating specifications according to 'YOLO' format
spec['yolo_class'].fillna(0, inplace=True)
spec['yolo_class'] = spec['yolo_class'].astype(int)
spec['yolo_x'] = np.abs(spec['Bounding Rect Right.1'] - np.abs(spec['Bounding Rect Left.1'] - spec['Bounding Rect Right.1']) /2) / W
spec['yolo_y'] = np.abs(spec['Bounding Rect Bottom.1'] - np.abs(spec['Bounding Rect Top.1'] - spec['Bounding Rect Bottom.1']) /2) / H
spec['yolo_width'] = pd.concat([spec['width'], spec['height']], axis=1).max(axis=1) / W 
spec['yolo_height'] = pd.concat([spec['width'], spec['height']], axis=1).max(axis=1) / H


spec.iloc[:,17:].head(14)

Unnamed: 0,insect_idx,name plate,R,G,B,yolo_class,normal_class,pname,year,width,height,yolo_x,yolo_y,yolo_width,yolo_height
9,9,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,86.289773,81.420455,56.420455,321,m,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.473797,0.057001,0.023749,0.035478
10,10,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,83.029478,73.429327,52.023432,186,bv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.124367,0.063505,0.023749,0.035478
11,11,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,61.247649,55.15674,39.075235,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.76322,0.068117,0.023749,0.035478
13,13,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,62.114458,58.0,38.60241,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.83336,0.116958,0.023749,0.035478
16,16,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,64.493562,59.922747,41.390558,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.465405,0.135643,0.023749,0.035478
19,19,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,82.238889,76.938889,54.372222,321,m,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.866688,0.144158,0.023749,0.035478
23,23,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,58.650909,53.770909,39.727273,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.86835,0.152436,0.023749,0.035478
24,24,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,69.03677,64.8279,45.897951,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.460418,0.172658,0.023749,0.035478
26,26,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,55.954023,51.435824,36.017241,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.878167,0.191343,0.023749,0.035478
27,27,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,72.68325,67.664461,49.002384,735,wmv,beauvech_w39_A_F10_51 mm_ISO160_1-15 s,20191003,150,150,0.101647,0.202578,0.023749,0.035478


In [77]:
spec.normal_class.shift(+1)

9      NaN
10       m
11      bv
13     wmv
16     wmv
      ... 
131     bv
132    wmv
133    wmv
134      m
135     sw
Name: normal_class, Length: 62, dtype: object

In [75]:
spec.normal_class

9        m
10      bv
11     wmv
13     wmv
16     wmv
      ... 
131    wmv
132    wmv
133      m
134     sw
135    wmv
Name: normal_class, Length: 62, dtype: object

In [52]:
#     save_insect_crops(spec, path_crops_export, plate_img)

specifications = spec
plate_img = read_plate(platename) 
H,W,_ = plate_img.shape

for _, row in specifications.iterrows():
    left  = int((row.yolo_x-row.yolo_width/2.)*W)
    right = int((row.yolo_x+row.yolo_width/2.)*W)
    top   = int((row.yolo_y-row.yolo_height/2.)*H)
    bot   = int((row.yolo_y+row.yolo_height/2.)*H)

    if(left < 0): left = 0;
    if(right > W-1): right = W-1;
    if(top < 0): top = 0;
    if(bot > H-1): bot = H-1;

    # print(f"left: {left}, right: {right}, top: {top}, bot: {bot}")
    crop = plate_img[top:bot, left:right]

    savepath = f"{path_crops_export}/{row.normal_class}/"
    if row.yolo_class == 735:
        break
#     if not os.path.isdir(savepath):
#         os.makedirs(savepath)
#     cv2.imwrite(f"{savepath}/{row.year}_{row.pname}_{row.insect_idx}.jpg", crop)


In [55]:
f"{savepath}/{row.year}_{row.pname}_{row.insect_idx}.jpg"

'/home/kalfasyan/data/images/sticky_plates/created_data/crops_export//wmv//20191003_beauvech_w39_A_F10_51 mm_ISO160_1-15 s_11.jpg'

In [69]:
platename.split('/')[len(BASE_DATA_DIR.split('/'))-1]

'2019'

In [61]:
len(BASE_DATA_DIR.split('/'))

7

In [66]:
len(BASE_DATA_DIR.split('/'))

7

In [67]:
platename

'/home/kalfasyan/data/images/sticky_plates/2019/20191003/plates/results/beauvech_w39_A_F10_51 mm_ISO160_1-15 s.png'

In [None]:
    # finding the annotated plates - i.e the ones that don't have all nans in 'class'
    if condition1 and condition2 and condition3:

        # Reading the plate image
        plate_img = read_plate(platename) 
        H,W,_ = plate_img.shape

        print(f'\nFound annotated data for plate: {condition1 and condition2} ----> Copying plate')
        annotated_plates.append(platename)
        print(f"Platename: {platename.split('/')[-1]}")
        spec['pname'] = pname
        spec['year'] = platename.split('/')[len(BASE_DATA_DIR.split('/'))]  
        # Making extracted boxes squares (to avoid distortions in future resizing)
        spec['width'] = 150
        spec['height'] = 150

        # Creating specifications according to 'YOLO' format
        spec['yolo_class'].fillna(0, inplace=True)
        spec['yolo_class'] = spec['yolo_class'].astype(int)
        spec['yolo_x'] = np.abs(spec['Bounding Rect Right.1'] - np.abs(spec['Bounding Rect Left.1'] - spec['Bounding Rect Right.1']) /2) / W
        spec['yolo_y'] = np.abs(spec['Bounding Rect Bottom.1'] - np.abs(spec['Bounding Rect Top.1'] - spec['Bounding Rect Bottom.1']) /2) / H
        spec['yolo_width'] = pd.concat([spec['width'], spec['height']], axis=1).max(axis=1) / W 
        spec['yolo_height'] = pd.concat([spec['width'], spec['height']], axis=1).max(axis=1) / H

        ann_full_new = os.path.join( path_annotations , f"{pname}.txt" )
        img_full_new = os.path.join( path_images , pname ) + '.jpg'

        # SAVING IMAGES
        if not os.path.isfile( img_full_new ):
            cv2.imwrite(img_full_new, plate_img)
        # SAVING ANNOTATIONS
        if not len(spec) and not os.path.isfile( ann_full_new ):
            print('Empty file', ann_full_new)
            break
        else:#if not os.path.isfile( ann_full_new ):
            spec[['yolo_class','yolo_x','yolo_y','yolo_width','yolo_height']].to_csv(ann_full_new, sep=' ', index=False, header=False)

        df_stats.loc[pname] = pd.Series({'nr_nans': spec[spec['yolo_class'] == nan_code].shape[0], 
                                            'unique_insects': spec['yolo_class'][spec['yolo_class'] != nan_code].unique().shape[0],
                                            'annotated': True})

        all_specs.append(spec)
        if args.save_extractions:
            save_insect_crops(spec, path_crops_export, plate_img)

    else:
        incomplete_plates.append(platename)

df_specs = pd.concat(all_specs, axis=0)

In [97]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import LabelEncoder

years = ['2019']
messed_up_index_plates = ["beauvech_w38_A_F10_51 mm_ISO160_1-15 s",
                        "beauvech_w39_A_F10_51 mm_ISO160_1-15 s",
                        "beauvech_w39_C_F10_51 mm_ISO160_1-15 s",
                        "brainelal_w38_A_F10_51 mm_ISO160_1-15 s",
                        "brainelal_w38_C_F10_51 mm_ISO160_1-15 s",
                        "brainelal_w38_B_F10_51 mm_ISO160_1-15 s",
                        "brainelal_w39_A_F10_51 mm_ISO160_1-15 s",
                        "brainelal_w39_B_F10_51 mm_ISO160_1-15 s",]
z=0
dataframes = []
for year in years:
    print(f"\n-- Processing expert labels for year: {year} --\n")
    labels_dir = f"{created_data_path}/expert_labels/{year}"
    if not os.path.isdir(labels_dir):
        os.mkdir(labels_dir)

    xlsx_files = [fname for fname in glob.iglob(labels_dir + '/**/*.xlsx', recursive=True)]
    assert len(xlsx_files), "No expert labels found. (excel files provided by a Proefcentrum)"

    print(f'Number of excel annotation files found: {len(xlsx_files)}')
    wanted_columns_set = set(['name plate', 'index', 'Klasse', 'klasse'])
    df_labeldata = []
    for f in xlsx_files:
        print(f"Processing annotation file: {f.split('/')[-1]}")
        if f.endswith('w00.xlsx'):
            print(f"Skipping file: {f.split('/')[-1]}")
            continue

        sub = pd.read_excel(f)
        assert sub.iloc[:,1].name == 'index'
        assert sub.iloc[:,1].iloc[0] == 0., 'Check if excel file index starts with 1 instead of 0.'        
        df = sub[list(wanted_columns_set.intersection(sub.columns))]
        assert len(df.columns) == 3, 'Check excel file columns.'
        df.columns = map(str.lower, df.columns)
        df.rename(columns={'name plate': 'platename', 'klasse': 'class', 'index': 'idx'}, inplace=True)

        problematic_inds = []
        fixed_subdfs = []
        for i,q in df.groupby('platename'):

            if q.idx.iloc[0] != 0:
                print(f"{q.idx.iloc[0]} found instead of 0 in first index of plate. Discarding it")
                problematic_inds.append(q.iloc[0].name)

            if i in messed_up_index_plates:
                z +=1
            #    q.idx = q.idx + 1.
                q['class'] = q['class'].shift(+1)
                break
            fixed_subdfs.append(q)
        if z>2:
            break
#         df.drop(problematic_inds, axis=0, inplace=True)
#         assert df.idx.isna().sum() == 0
#         df_labeldata.append(df)

#     sub = pd.concat(df_labeldata, axis=0)
#     sub['class'] = sub['class'].apply(lambda x: str(x).replace(" ","").lower())
#     # sub['class'] = sub['class'].apply(lambda x: str(x).replace("2",""))
#     # sub['class'] = sub['class'].apply(lambda x: str(x).replace("3",""))
#     # sub['class'] = sub['class'].apply(lambda x: str(x).replace("4",""))
#     dataframes.append(sub)


-- Processing expert labels for year: 2019 --

Number of excel annotation files found: 14
Processing annotation file: results_20200212_W40 .xlsx
Processing annotation file: results_datageleplaten_w00.xlsx
Skipping file: results_datageleplaten_w00.xlsx
Processing annotation file: results_20190730_20190806.xlsx
Processing annotation file: results_2019_annotations_yannis.xlsx
Processing annotation file: results_20190821.xlsx
Processing annotation file: results_20191004_W38_en_W39.xlsx


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing annotation file: results_20191001_W35.xlsx
Processing annotation file: results_20200129_W41 .xlsx
Processing annotation file: results_20190809_ W31.xlsx
Processing annotation file: results_20191001_W36.xlsx
Processing annotation file: results_may_june.xlsx
Processing annotation file: results_20191001_W34.xlsx
Processing annotation file: results_20191001_W37.xlsx
Processing annotation file: results_20180806_W29.xlsx


Unnamed: 0,platename,idx,class
0,BEAUV_W29_A_37_F11_51 mm_ISO160_1-15 s,0,vuil
1,BEAUV_W29_A_37_F11_51 mm_ISO160_1-15 s,1,vuil
2,BEAUV_W29_A_37_F11_51 mm_ISO160_1-15 s,2,vuil
3,BEAUV_W29_A_37_F11_51 mm_ISO160_1-15 s,3,vul
4,BEAUV_W29_A_37_F11_51 mm_ISO160_1-15 s,4,vuil
...,...,...,...
11556,her_262719_16_27_F11_51 mm_ISO160_1-15 s,59,v
11557,her_262719_16_27_F11_51 mm_ISO160_1-15 s,60,c
11558,her_262719_16_27_F11_51 mm_ISO160_1-15 s,61,m
11559,her_262719_16_27_F11_51 mm_ISO160_1-15 s,62,vuil


In [60]:
# q.drop(q.iloc)
# df = df.iloc[3:]
df.idx.loc[2682]

nan

In [58]:
q.iloc[0].name

2682

In [69]:
df.idx.isna().sum()

1

In [119]:
for i, subdf in sub.groupby(['platename']):
    if subdf.idx.iloc[0] != 0:
        print(sub.platename.unique()[0])

        print("ERROR")
        print(subdf.idx.iloc[0])
        break

beauvech_w38_A_F10_51 mm_ISO160_1-15 s
ERROR
nan


In [120]:
subdf

Unnamed: 0,class,idx,platename
2682,vuil,,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2683,vuil,0.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2684,vuil,1.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2685,st,2.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2686,vuil,3.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
...,...,...,...
2849,vuil,166.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2850,vuil,167.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2851,vuil,168.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s
2852,vuil,169.0,beauvech_w39_B_F10_51 mm_ISO160_1-15 s


In [189]:
sub[sub.platename == 'beauvech_w39_B_F10_51 mm_ISO160_1-15 s']

Unnamed: 0,class,idx,platename


In [99]:
sub.platename.apply(lambda x: '_'.join(x.split('_')[:3]))

0         BEAUV_W29_A
1         BEAUV_W29_A
2         BEAUV_W29_A
3         BEAUV_W29_A
4         BEAUV_W29_A
             ...     
12480    KAMPEN_W29_C
12481    KAMPEN_W29_C
12482    KAMPEN_W29_C
12483    KAMPEN_W29_C
12484    KAMPEN_W29_C
Name: platename, Length: 12485, dtype: object

In [107]:
sub[sub.platename == "beauvech_w38_C_F10_51 mm_ISO160_1-15 s"].head(10)

Unnamed: 0,class,idx,platename
923,vuil,0.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
924,vuil,1.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
925,vuil,2.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
926,vuil,3.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
927,vuil,4.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
928,vuil,5.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
929,st,6.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
930,vuil,7.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
931,vuil,8.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s
932,bl,9.0,beauvech_w38_C_F10_51 mm_ISO160_1-15 s


In [109]:
sub = pd.read_excel(f)

In [113]:
sub[sub['name plate'] == "beauvech_w38_C_F10_51 mm_ISO160_1-15 s"].head(10)

Unnamed: 0,name plate,index,Center of Mass X,Center of Mass Y,Bounding Rect Left,Bounding Rect Top,Bounding Rect Right,Bounding Rect Bottom,Equivalent Ellipse Major Axis,Equivalent Ellipse Minor Axis,...,Elongation Factor.1,Compactness Factor.1,Heywood Circularity Factor.1,Type Factor.1,R,G,B,Index,Klasse,Unnamed: 41
923,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,0.0,4843020,-3086825,4530861,-3581114,5294415,-2779383,1103129,285991,...,2648402.0,404762,1434655.0,547137,93235294.0,85905882.0,58805882.0,1,vuil,
924,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,1.0,8524276,-2769907,8272276,-3046626,8768586,-2512139,634819,406344,...,1718310.0,763736,1049372.0,982825,90755396.0,83762590.0,58625899.0,2,vuil,
925,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,2.0,4242761,-2472193,4072729,-2779383,4416328,-2168539,651936,307432,...,2601852.0,750000,1138454.0,973558,73768519.0,65685185.0,42527778.0,3,vuil,
926,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,3.0,5059826,-2389369,4492683,-2703027,5485303,-2092184,1082669,337676,...,4097674.0,473558,1326298.0,684026,97111675.0,89025381.0,62964467.0,4,vuil,
927,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,4.0,131736289,-1809016,131319005,-2283072,132044381,-1290452,1516860,236125,...,2898026.0,390688,1813787.0,446693,79782383.0,73637306.0,49119171.0,5,vuil,
928,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,5.0,141633228,-1885061,140939786,-2015828,142199650,-1748585,1251585,158655,...,8784000.0,463203,2001936.0,800285,103570093.0,98074766.0,68439252.0,6,vuil,
929,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,6.0,110277790,-1762636,109824960,-1977651,110779402,-1481341,1249997,213788,...,3320197.0,443077,1734637.0,667655,102909722.0,95541667.0,66423611.0,7,st,
930,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,7.0,39012150,-1654529,38241771,-1939473,39616168,-908675,1881344,162759,...,4665899.0,169753,2413045.0,164176,76133333.0,69327273.0,46739394.0,8,vuil,
931,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,8.0,143304578,-1756899,142543249,-1939473,143994002,-1557696,1521182,198855,...,6224576.0,428947,1972363.0,626519,105840491.0,99226994.0,70355828.0,9,vuil,
932,beauvech_w38_C_F10_51 mm_ISO160_1-15 s,9.0,69397744,-156652,68974820,-1175919,69814729,1000210,2530634,362999,...,5949914.0,394737,1886121.0,557334,89252525.0,82363636.0,57949495.0,10,bl,


In [None]:
df = pd.concat(dataframes, axis=0)
le = LabelEncoder()
df['class_encoded'] = le.fit_transform(df['class'].tolist())

path_annotations = f'{created_data_path}/annotations/'
assert os.path.isdir(path_annotations), "Annotations path not created."

mapped = dict(zip(le.transform(le.classes_), le.classes_))
# Saving class mapping to use as yolo annotation classes
pd.Series(mapped).to_csv(f'{path_annotations}/classes.txt', sep=' ')
# Saving class mapping to use when processing each plate
df.to_csv(f'{created_data_path}/class_mapping.csv')


-- Processing expert labels for year: 2019 --

Number of excel annotation files found: 14
Processing annotation file: results_20200212_W40 .xlsx
Processing annotation file: results_datageleplaten_w00.xlsx
Skipping file: results_datageleplaten_w00.xlsx
Processing annotation file: results_20190730_20190806.xlsx


KeyboardInterrupt: 