In [148]:
import os
from glob import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et

In [149]:
#load all xml files and store in a list
xmlfiles = glob('./data_images/annotations/*.xml')

# data cleaning . replace \\ with /
replace_text = lambda x: x.replace('\\','/')
xmlfiles = list(map(replace_text,xmlfiles))

In [150]:
xmlfiles

['./data_images/annotations/02.xml',
 './data_images/annotations/2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg.xml',
 './data_images/annotations/2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a21ffb.jpg.xml',
 './data_images/annotations/2007_000061_jpg.rf.46ef80849da87d8113c0330be6ea5beb.jpg.xml',
 './data_images/annotations/2007_000068_jpg.rf.59f7be9df94d9d96fbe68b8d25df36f2.jpg.xml',
 './data_images/annotations/2007_000121_jpg.rf.807519c5d839ae8e10504ffb0c132f39.jpg.xml',
 './data_images/annotations/2007_000175_jpg.rf.89dd3b1705cf32b3b67f325b8b782a36.jpg.xml',
 './data_images/annotations/2007_000187_jpg.rf.2ab27aab673e7d40a76a53440f177c98.jpg.xml',
 './data_images/annotations/2007_000241_jpg.rf.e14a4e70f555b90ac0e90134b6021bf2.jpg.xml',
 './data_images/annotations/2007_000243_jpg.rf.637dab3bc3adf92ba84fcbe8f37805ee.jpg.xml',
 './data_images/annotations/2007_000250_jpg.rf.f0fe3dea5cb311aa77a48bdb159ceba3.jpg.xml',
 './data_images/annotations/2007_000256_jpg.rf.02cb96ee20db070c

In [172]:
# Read xml file
# from each xml file we need to extract
# filename, size(width, height), object(name, xmin, xmax, ymin, ymax)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()
    
    # etract filename
    image_name = root.find('filename').text
    
    # width and height of the image
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    objs = root.findall('object')
    parser = []
    # obj = objs[0]
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin = bndbox.find('ymin').text
        ymax = bndbox.find('ymax').text
        parser.append([image_name,width,height,name,xmin,xmax,ymin,ymax])
    return parser

In [173]:
parser

[['2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg',
  '500',
  '281',
  'aeroplane',
  '130',
  '196',
  '90',
  '122'],
 ['2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg',
  '500',
  '281',
  'human',
  '21',
  '52',
  '189',
  '236'],
 ['2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg',
  '500',
  '281',
  'human',
  '197',
  '211',
  '186',
  '234'],
 ['2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg',
  '500',
  '281',
  'aeroplane',
  '96',
  '381',
  '79',
  '180']]

In [174]:
parser_all = list(map(extract_text,xmlfiles))

In [175]:
data = reduce(lambda x, y : x+y,parser_all)

In [176]:
df = pd.DataFrame(data,columns = ['filename','width','height','name','xmin','xmax','ymin','ymax'])

In [177]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,02.jpg,1024,768,car,174,871,237,557
1,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,103,374,77,182
2,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,132,196,87,122
3,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,194,212,179,228
4,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,25,43,188,237


In [178]:
df.shape

(31357, 8)

In [179]:
df['name'].value_counts()

person         13168
chair           2467
car             1989
dog             1234
bottle          1226
bird            1056
cat             1034
pottedplant      951
sheep            858
boat             854
aeroplane        808
tvmonitor        735
bicycle          695
sofa             677
motorbike        640
cow              638
diningtable      618
horse            611
train            561
bus              537
Name: name, dtype: int64

In [180]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31357 entries, 0 to 31356
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  31357 non-null  object
 1   width     31357 non-null  object
 2   height    31357 non-null  object
 3   name      31357 non-null  object
 4   xmin      31357 non-null  object
 5   xmax      31357 non-null  object
 6   ymin      31357 non-null  object
 7   ymax      31357 non-null  object
dtypes: object(8)
memory usage: 1.9+ MB


In [181]:
#type conversion
cols = ['width','height','xmin','xmax','ymin','ymax']
df[cols] = df[cols].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31357 entries, 0 to 31356
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  31357 non-null  object
 1   width     31357 non-null  int32 
 2   height    31357 non-null  int32 
 3   name      31357 non-null  object
 4   xmin      31357 non-null  int32 
 5   xmax      31357 non-null  int32 
 6   ymin      31357 non-null  int32 
 7   ymax      31357 non-null  int32 
dtypes: int32(6), object(2)
memory usage: 1.2+ MB


In [182]:
#center x, center y
df['center_x'] = ((df['xmax'] + df['xmin'])/2)/df['width']
df['center_y'] = ((df['ymax'] + df['ymin'])/2)/df['height']

# w
df['w'] = (df['xmax'] - df['xmin'])/df['width']

#h
df['h'] = (df['ymax'] - df['ymin'])/df['height']


In [183]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,02.jpg,1024,768,car,174,871,237,557,0.510254,0.516927,0.680664,0.416667
1,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,103,374,77,182,0.477,0.460854,0.542,0.373665
2,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,132,196,87,122,0.328,0.371886,0.128,0.124555
3,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,194,212,179,228,0.406,0.724199,0.036,0.174377
4,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,25,43,188,237,0.068,0.756228,0.036,0.174377


In [184]:
# Split data into train and test
images = df['filename'].unique()

In [259]:
len(images)

13300

In [186]:
# 80% train and 20% test
img_df = pd.DataFrame(images,columns = ['filename'])
img_train = tuple(img_df.sample(frac = 0.8)['filename'])  # random shuffle and pick 70% of images


In [260]:
img_train

('2010_004783_jpg.rf.07da048cce6fb9be60290f8774d74502.jpg',
 '2010_005968_jpg.rf.2e6cd930a5156c3b450c96d25aeb41f6.jpg',
 '2011_001111_jpg.rf.0542a7ffd3866f61d8a893606bcc2c2d.jpg',
 '2008_007166_jpg.rf.77edd125cbd870d8613216d123bc3ddf.jpg',
 '2008_006624_jpg.rf.c89e6bf8feb065259dac36a4915ade9a.jpg',
 '2011_003465_jpg.rf.96e17f62f35c4d5dcd1d331095034c40.jpg',
 '2008_002124_jpg.rf.6a9386ba17610e3297e18ee6cc6e2de8.jpg',
 '2008_006065_jpg.rf.1c57c2cb1b559e4f2b9643cc82ce7282.jpg',
 '2012_002125_jpg.rf.088976ab08b513b05663bd9b35183ad8.jpg',
 '2008_002961_jpg.rf.a6baeaec8a3c54b61c1c5300a803d6f1.jpg',
 '2010_006095_jpg.rf.58237c33c7ce03d7df69ddc82784a155.jpg',
 '2011_004263_jpg.rf.cdfe4802edb1f89d4b8a641a64360870.jpg',
 '2007_004663_jpg.rf.4d4b4bbe29cad60b9883e497b4983f44.jpg',
 '2008_008103_jpg.rf.a7ad2127af12008723ca826f834e8b4d.jpg',
 '2012_004210_jpg.rf.ff6a83996610f66c505b6149630a2f89.jpg',
 '2010_005041_jpg.rf.0a55490ee74fee517026d417d56e5d82.jpg',
 '2008_008319_jpg.rf.48b3a73deb84f0a3cd8

In [188]:
len(img_train)

10640

In [189]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename'])  # take rest 20% images

In [190]:
img_test

('02.jpg',
 '2007_000032_jpg.rf.453cf71521fb73718369a7f07a41433c.jpg',
 '2007_000256_jpg.rf.02cb96ee20db070c880b4b909ccf0874.jpg',
 '2007_000332_jpg.rf.fcb9e3219402ff64f7784a12ec575f97.jpg',
 '2007_000364_jpg.rf.935cacdb0f22aed817049c51b5da1e6b.jpg',
 '2007_000452_jpg.rf.743b20d564817ed050001950de651abd.jpg',
 '2007_000480_jpg.rf.adf26b6f20506b2b114ecb3c849101a3.jpg',
 '2007_000504_jpg.rf.d0a67292c8850ecf8305402f95c7daa3.jpg',
 '2007_000648_jpg.rf.0e74d108c5398bdc5b8911a62eedd95f.jpg',
 '2007_000676_jpg.rf.dbc2d4532de35a600a5d538892c21e85.jpg',
 '2007_000768_jpg.rf.f3aa7671aaaec5184e2618b17f81d131.jpg',
 '2007_001185_jpg.rf.4e912e5db5c29a4d79b9dda8ac3e0d1f.jpg',
 '2007_001397_jpg.rf.c4057b03dd210a4476af866845bf46d8.jpg',
 '2007_001430_jpg.rf.a98a169e695f2085df64175aa00ebfd8.jpg',
 '2007_001439_jpg.rf.c2a7b78547bf61df2ea758b9a5215860.jpg',
 '2007_001585_jpg.rf.219c8c4b1f2e4ea3f53433de0cdb2642.jpg',
 '2007_001698_jpg.rf.d435f8b2bd8ab45e8179c7bd40aee655.jpg',
 '2007_001709_jpg.rf.3c32d28e

In [191]:
len(img_train),len(img_test)

(10640, 2660)

In [192]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')


In [261]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
5,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,8,498,106,262,0.506,0.502732,0.98,0.42623,10
6,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,420,481,199,225,0.901,0.579235,0.122,0.071038,10
7,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,324,410,187,222,0.734,0.558743,0.172,0.095628,10
8,2007_000061_jpg.rf.46ef80849da87d8113c0330be6e...,500,333,boat,273,436,10,278,0.709,0.432432,0.326,0.804805,9
9,2007_000061_jpg.rf.46ef80849da87d8113c0330be6e...,500,333,boat,183,280,213,251,0.463,0.696697,0.194,0.114114,9


In [262]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,02.jpg,1024,768,car,174,871,237,557,0.510254,0.516927,0.680664,0.416667,2
1,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,103,374,77,182,0.477,0.460854,0.542,0.373665,10
2,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,132,196,87,122,0.328,0.371886,0.128,0.124555,10
3,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,194,212,179,228,0.406,0.724199,0.036,0.174377,0
4,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,25,43,188,237,0.068,0.756228,0.036,0.174377,0


In [263]:
df['name'].value_counts()

person         13168
chair           2467
car             1989
dog             1234
bottle          1226
bird            1056
cat             1034
pottedplant      951
sheep            858
boat             854
aeroplane        808
tvmonitor        735
bicycle          695
sofa             677
motorbike        640
cow              638
diningtable      618
horse            611
train            561
bus              537
Name: name, dtype: int64

In [264]:
# Assign id number to object names
def label_encoding(x):
    labels = {'person' :0, 'chair':1, 'car':2, 'dog':3,'bottle':4,'bird' :5, 'cat':6, 'pottedplant':7,'sheep':8,'boat':9,'aeroplane':10,'tvmonitor':11,'bicycle':12,
             'sofa':13,'motorbike':14,'cow':15,'diningtable':16,'horse':17,'train':18,'bus':19}
    return labels[x]

In [265]:
train_df['id'] = train_df['name'].apply(label_encoding)
test_df['id'] = test_df['name'].apply(label_encoding)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['id'] = train_df['name'].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['id'] = test_df['name'].apply(label_encoding)


In [266]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
5,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,8,498,106,262,0.506,0.502732,0.98,0.42623,10
6,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,420,481,199,225,0.901,0.579235,0.122,0.071038,10
7,2007_000033_jpg.rf.83ab0d65cbcc0be92b649082c8a...,500,366,aeroplane,324,410,187,222,0.734,0.558743,0.172,0.095628,10
8,2007_000061_jpg.rf.46ef80849da87d8113c0330be6e...,500,333,boat,273,436,10,278,0.709,0.432432,0.326,0.804805,9
9,2007_000061_jpg.rf.46ef80849da87d8113c0330be6e...,500,333,boat,183,280,213,251,0.463,0.696697,0.194,0.114114,9


In [267]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,02.jpg,1024,768,car,174,871,237,557,0.510254,0.516927,0.680664,0.416667,2
1,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,103,374,77,182,0.477,0.460854,0.542,0.373665,10
2,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,aeroplane,132,196,87,122,0.328,0.371886,0.128,0.124555,10
3,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,194,212,179,228,0.406,0.724199,0.036,0.174377,0
4,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,500,281,person,25,43,188,237,0.068,0.756228,0.036,0.174377,0


In [268]:
# save Image and Label in text
import os
from shutil import move

In [269]:
# train_folder = 'data_images/train'
# test_folder = 'data_images/test'


# os.mkdir(train_folder)
# os.mkdir(test_folder)

In [270]:
cols = ['filename','id','center_x',	'center_y','w',	'h']
groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby('filename')


In [271]:
# groupby_obj_train.get_group('02.jpg').set_index('filename').to_csv('sample.txt',index=False,header=False)
#save each image in train/test folder and respective labels in .txt 

def save_data(filename,folder_path,group_obj):
    try:
        group_data = group_obj.get_group(filename)
    except KeyError:
        print(f"Error: Filename '{filename}' not found in the group.")
        return
    # move image
    src = os.path.join('data_images',filename)
    dst = os.path.join(folder_path,filename)
    move(src,dst) 

    # save the labels
    text_filename =os.path.join(folder_path,
                            os.path.splitext(filename)[0]+'.txt')
    # groupby_obj_train.get_group(filename).set_index('filename').to_csv(text_filename, sep=' ', index=False, header=False)
    group_obj.get_group(filename).set_index('filename').to_csv(text_filename,sep=' ',index=False,header=False)



In [272]:
filename_series = pd.Series(groupby_obj_train.groups.keys())

In [273]:
filename_series.apply(save_data,args=(train_folder,groupby_obj_train))


0        None
1        None
2        None
3        None
4        None
         ... 
10635    None
10636    None
10637    None
10638    None
10639    None
Length: 10640, dtype: object

In [274]:
filename_series_test = pd.Series(groupby_obj_test.groups.keys())


In [275]:
filename_series_test

0                                                  02.jpg
1       2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...
2       2007_000256_jpg.rf.02cb96ee20db070c880b4b909cc...
3       2007_000332_jpg.rf.fcb9e3219402ff64f7784a12ec5...
4       2007_000364_jpg.rf.935cacdb0f22aed817049c51b5d...
                              ...                        
2655    2012_004293_jpg.rf.3d4fe35172394c67d21759e499c...
2656    2012_004298_jpg.rf.67d22562ef82b3705800286b087...
2657    2012_004300_jpg.rf.cfc9b270717c3f1c6230ac01969...
2658    2012_004306_jpg.rf.2e0c26d54a3e3fc3bd9da724556...
2659    2012_004315_jpg.rf.c9f36613c4fed684d28163e2518...
Length: 2660, dtype: object

In [276]:
groupby_obj_test.head()

Unnamed: 0,filename,id,center_x,center_y,w,h
0,02.jpg,2,0.510254,0.516927,0.680664,0.416667
1,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,10,0.477000,0.460854,0.542000,0.373665
2,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,10,0.328000,0.371886,0.128000,0.124555
3,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,0,0.406000,0.724199,0.036000,0.174377
4,2007_000032_jpg.rf.453cf71521fb73718369a7f07a4...,0,0.068000,0.756228,0.036000,0.174377
...,...,...,...,...,...,...
31341,2012_004293_jpg.rf.3d4fe35172394c67d21759e499c...,0,0.446667,0.624000,0.893333,0.748000
31343,2012_004298_jpg.rf.67d22562ef82b3705800286b087...,0,0.285000,0.459420,0.230000,0.275362
31345,2012_004300_jpg.rf.cfc9b270717c3f1c6230ac01969...,0,0.518000,0.305333,0.068000,0.082667
31348,2012_004306_jpg.rf.2e0c26d54a3e3fc3bd9da724556...,0,0.166000,0.637333,0.232000,0.720000


In [277]:
filename_series_test.apply(save_data,args=(test_folder,groupby_obj_test))

0       None
1       None
2       None
3       None
4       None
        ... 
2655    None
2656    None
2657    None
2658    None
2659    None
Length: 2660, dtype: object

In [278]:
import torch

In [279]:
torch.cuda.is_available()

True