In [1]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
from tqdm.notebook import tqdm

In [2]:
test_df = pd.read_csv(f'/kaggle/input/train-90-10-split/new_df_10.csv')

# dropping ALL duplicte values
test_df.drop_duplicates(subset ="image_id", keep = 'first', inplace = True)

test_df.head()

Unnamed: 0,image_id,width,height
0,000ae00eb3942d27e0b97903dd563a6e,3072,3072
3,001d127bad87592efe45a5c7678f8b8d,3072,3072
9,00291f7aff0123ea76a59998effef229,3072,3072
12,003cfe5ce5c0ec5163138eb3b740e328,1994,2430
15,004dc2a50591fb5f1aaf012bffa95fd9,1994,2430


In [3]:
def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes



In [4]:
image_ids = []
PredictionStrings = []

for file_path in tqdm(glob('/kaggle/input/infer-10percent-train-as-test-512images/yolov5/runs/detect/exp/labels/*txt')):  
    image_id = file_path.split('/')[-1].split('.')[0]
    w, h = test_df.loc[test_df.image_id==image_id,['width', 'height']].values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)

    
    data = data[:, [0, 5, 1, 2, 3, 4]]
#     print(data)
    bboxes_voc = np.concatenate((data[:, :2].tolist(), np.round(yolo2voc(h, w, data[:, 2:]))), axis =1)
#     print(bboxes_voc)
#     break

    for box in bboxes_voc:
        image_ids.append(image_id)
        PredictionStrings.append(box)


  0%|          | 0/1482 [00:00<?, ?it/s]

In [5]:
pred_df = pd.DataFrame({'image_id':image_ids,
                        'PredictionString':PredictionStrings})

pred_df['x_min'] = pred_df['PredictionString'].str.get(2)
pred_df['y_min'] = pred_df['PredictionString'].str.get(3)
pred_df['x_max'] = pred_df['PredictionString'].str.get(4)
pred_df['y_max'] = pred_df['PredictionString'].str.get(5)
pred_df['label'] = pred_df['PredictionString'].str.get(0)
pred_df['confidence_score'] = pred_df['PredictionString'].str.get(1)

In [6]:
pred_df.drop(['PredictionString'], axis=1, inplace=True)

In [7]:
pred_df.head()

Unnamed: 0,image_id,x_min,y_min,x_max,y_max,label,confidence_score
0,693008c77b426c4afc45e990f5f07287,996.0,747.0,1372.0,1313.0,5.0,0.010429
1,693008c77b426c4afc45e990f5f07287,980.0,669.0,1372.0,1313.0,7.0,0.01062
2,693008c77b426c4afc45e990f5f07287,1152.0,879.0,1392.0,1323.0,9.0,0.015495
3,693008c77b426c4afc45e990f5f07287,712.0,986.0,1308.0,1323.0,3.0,0.024445
4,693008c77b426c4afc45e990f5f07287,1080.0,830.0,1392.0,1313.0,4.0,0.027206


In [8]:
pred_df.describe()

Unnamed: 0,x_min,y_min,x_max,y_max,label,confidence_score
count,14006.0,14006.0,14006.0,14006.0,14006.0,14006.0
mean,1041.427103,1028.563687,1392.419891,1378.572969,7.654005,0.134524
std,588.894008,589.914209,606.95246,644.789205,3.95674,0.2103
min,-0.0,0.0,15.0,43.0,0.0,0.01001
25%,567.0,546.0,890.0,858.25,5.0,0.016575
50%,963.5,911.0,1399.0,1353.5,9.0,0.035095
75%,1488.0,1442.75,1876.0,1889.0,11.0,0.13388
max,2784.0,3074.0,2988.0,3368.0,13.0,0.905273


In [9]:
sub_df = pd.merge(test_df, pred_df, on = 'image_id', how = 'left').fillna("14 1 0 0 1 1")
print(len(sub_df))

sub_df.drop(['width', 'height'], axis=1, inplace=True)
sub_df.to_csv('/kaggle/working/predictions_train_10.csv',index = False)

sub_df.tail()

14034


Unnamed: 0,image_id,x_min,y_min,x_max,y_max,label,confidence_score
14029,ffeffc54594debf3716d6fcd2402a99f,1396.0,333.0,1747.0,414.0,11.0,0.011871
14030,ffeffc54594debf3716d6fcd2402a99f,19.0,2852.0,284.0,2944.0,13.0,0.012192
14031,ffeffc54594debf3716d6fcd2402a99f,837.0,328.0,1107.0,414.0,11.0,0.015343
14032,ffeffc54594debf3716d6fcd2402a99f,857.0,1518.0,1867.0,1989.0,3.0,0.191284
14033,ffeffc54594debf3716d6fcd2402a99f,1251.0,759.0,1554.0,1162.0,0.0,0.782227


In [10]:
%cd /kaggle/working

from IPython.display import FileLink 
FileLink(r'predictions_train_10.csv')

/kaggle/working
