In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import json

from tqdm import tqdm_notebook
from glob import glob
from pathlib import Path
#from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, image

%matplotlib inline                               

In [2]:
pklot_path_list = list(Path('.').glob('PKLot/PKLot/*/*/*/*.jpg'))
json_path_list = list(Path('.').glob('json/*.json'))

In [6]:
len(pklot_path_list)

7943

In [7]:
pklot_df = pd.DataFrame({'image_path': pklot_path_list})
json_df = pd.DataFrame({'json_path': json_path_list})

In [8]:
pklot_df['date_id'] = pklot_df['image_path'].map(lambda x: x.stem)
json_df['date_id'] = json_df['json_path'].map(lambda x: x.stem)

In [9]:
new_df = pklot_df.merge(json_df, left_on='date_id', right_on='date_id', how='inner')

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

img = load_img(pklot_files[0], target_size=(150, 150))  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

i = 0
for batch in datagen.flow(x, batch_size=1):
    plt.figure(i)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    i += 1
    if i % 4 == 0:
        break

In [10]:
new_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7943 entries, 0 to 7942
Data columns (total 3 columns):
image_path    7943 non-null object
date_id       7943 non-null object
json_path     7943 non-null object
dtypes: object(3)
memory usage: 248.2+ KB


In [11]:
def jsonToList(pklot_file, json_file):
    bbox_list = []
    class_list = []
    contour_list = []

    with open(json_file, 'r') as json_data:
        for line in json_data:
            row = json.loads(line)
            
            bbox_info = ''
            class_info = ''
            contour_info = ''
            
            bbox_row_dict = {'image_path': pklot_file}
            class_row_dict = {'image_path': pklot_file}
            contour_row_dict = {'image_path': pklot_file}
            
            for segmented_data in row['spaces']:
                bbox_info += '{} {} {} {} {} '.format(segmented_data['rotatedRect']['center']['x'], 
                                                      segmented_data['rotatedRect']['center']['y'], 
                                                      segmented_data['rotatedRect']['size']['h'], 
                                                      segmented_data['rotatedRect']['size']['w'], 
                                                      segmented_data['rotatedRect']['angle']['d'])
                class_info += '{} '.format(segmented_data['occupied'])
                
                for contour in segmented_data['contour']:
                    contour_info += '{} {} '.format(contour['x'], contour['y'])
            
            bbox_row_dict['rotated_bbox'] = bbox_info
            bbox_row_dict['rotated_bbox'].rstrip()
            bbox_list.append(bbox_row_dict)
            
            class_row_dict['occupied'] = class_info
            class_row_dict['occupied'].rstrip()
            class_list.append(class_row_dict)
            
            contour_row_dict['contours'] = contour_info
            contour_row_dict['contours'].rstrip()
            contour_list.append(contour_row_dict)
    return bbox_list, class_list, contour_list


In [12]:
bbox_data = []
class_data = []
contour_data = []

for pklot_path, json_path in tqdm_notebook(zip(new_df['image_path'], new_df['json_path']), total=12417):
    bbox_list, class_list, contour_list = jsonToList(pklot_path, json_path)
    
    bbox_data.extend(bbox_list)
    class_data.extend(class_list)
    contour_data.extend(contour_list)
    
    #data.extend(jsonToList(pklot_path, json_path))

HBox(children=(IntProgress(value=0, max=12417), HTML(value='')))




In [13]:
bbox_df = pd.DataFrame(bbox_data)
class_df = pd.DataFrame(class_data)
contour_df = pd.DataFrame(contour_data)

In [14]:
bbox_df['rotated_bbox'] = bbox_df['rotated_bbox'].map(lambda x: x.rstrip())
class_df['occupied'] = class_df['occupied'].map(lambda x: x.rstrip())
contour_df['contours'] = contour_df['contours'].map(lambda x: x.rstrip())

In [15]:
bbox_df.head()

Unnamed: 0,image_path,rotated_bbox
0,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
1,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
2,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
3,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
4,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...


In [16]:
class_df.head()

Unnamed: 0,image_path,occupied
0,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...
1,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...
2,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...
3,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...
4,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...


In [17]:
contour_df.head()

Unnamed: 0,contours,image_path
0,641 570 726 671 650 708 577 602 698 505 782 58...,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...
1,641 570 726 671 650 708 577 602 698 505 782 58...,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...
2,641 570 726 671 650 708 577 602 698 505 782 58...,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...
3,641 570 726 671 650 708 577 602 698 505 782 58...,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...
4,641 570 726 671 650 708 577 602 698 505 782 58...,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...


In [18]:
bbox_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7943 entries, 0 to 7942
Data columns (total 2 columns):
image_path      7943 non-null object
rotated_bbox    7943 non-null object
dtypes: object(2)
memory usage: 124.2+ KB


In [19]:
class_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7943 entries, 0 to 7942
Data columns (total 2 columns):
image_path    7943 non-null object
occupied      7943 non-null object
dtypes: object(2)
memory usage: 124.2+ KB


In [20]:
contour_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7943 entries, 0 to 7942
Data columns (total 2 columns):
contours      7943 non-null object
image_path    7943 non-null object
dtypes: object(2)
memory usage: 124.2+ KB


In [21]:
data_PUCPR = pd.read_csv('PUCPR_cropped.csv')

In [22]:
data_UFPRs = class_df.merge(bbox_df, left_on='image_path', right_on='image_path')

In [23]:
data_PUCPR.head()

Unnamed: 0,image_path,occupied,rotated_bbox
0,PKLot/AugPKLot/PUCPR/Rainy/2012-11-10/2012-11-...,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,13 16 32 52 -77 45 15 36 54 -79 77 18 31 50 -7...
1,PKLot/AugPKLot/PUCPR/Rainy/2012-11-10/2012-11-...,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,13 16 32 52 -77 45 15 36 54 -79 77 18 31 50 -7...
2,PKLot/AugPKLot/PUCPR/Rainy/2012-11-10/2012-11-...,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,13 16 32 52 -77 45 15 36 54 -79 77 18 31 50 -7...
3,PKLot/AugPKLot/PUCPR/Rainy/2012-11-10/2012-11-...,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,13 16 32 52 -77 45 15 36 54 -79 77 18 31 50 -7...
4,PKLot/AugPKLot/PUCPR/Rainy/2012-11-10/2012-11-...,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,13 16 32 52 -77 45 15 36 54 -79 77 18 31 50 -7...


In [24]:
data_UFPRs.head()

Unnamed: 0,image_path,occupied,rotated_bbox
0,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
1,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
2,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
3,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
4,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...


In [25]:
data_combined = data_UFPRs.append(data_PUCPR, ignore_index=True)

In [26]:
data_combined.head()

Unnamed: 0,image_path,occupied,rotated_bbox
0,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
1,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
2,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
3,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...
4,PKLot/PKLot/UFPR04/Rainy/2013-01-18/2013-01-18...,1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 ...,651 636 141 83 -34 705 563 139 77 -44 732 480 ...


In [27]:
data_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12416 entries, 0 to 12415
Data columns (total 3 columns):
image_path      12416 non-null object
occupied        12416 non-null object
rotated_bbox    12416 non-null object
dtypes: object(3)
memory usage: 291.1+ KB


In [28]:
data_combined.to_csv('pklot_combined_bbox_rbox.csv', index=False)

In [None]:
# bbox_df.to_csv('pklot_bbox_data.csv', index=False)
# class_df.to_csv('pklot_class_data.csv', index=False)
# contour_df.to_csv('pklot_contour_data.csv', index=False)