## Exploratory Data Analysis

Goal: Determine the instruction to get the format for metadata needed for extracting the ground truth.

#### Extracting head data from csv file

In [1]:
# Magic Functions
%matplotlib inline

# Import Libraries for data EDA
import pandas as pd
import numpy as np
import os
import json
import matplotlib.pyplot as plt

from skimage import io, data, transform
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb

In [16]:
# Read the data
head_data = pd.read_csv('../dsp_intent_analyzer_dataset/head_data.csv')

In [17]:
head_data.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,019_gaze_utensils.png,1324677,"{""caption"":"""",""public_domain"":""no"",""image_url""...",1,0,"{""name"":""point"",""cx"":1080,""cy"":147}","{""name"":""head_pos"",""image_quality"":{""good_illu..."
1,019_gaze_undetermined.png,1409888,"{""caption"":"""",""public_domain"":""no"",""image_url""...",1,0,"{""name"":""point"",""cx"":1031,""cy"":73}","{""name"":""head_pos"",""image_quality"":{""good_illu..."
2,019_gaze_spontaneous.png,1338064,"{""caption"":"""",""public_domain"":""no"",""image_url""...",1,0,"{""name"":""point"",""cx"":1245,""cy"":176}","{""name"":""head_pos"",""image_quality"":{""blur"":tru..."
3,019_gaze_food.png,1290376,"{""caption"":"""",""public_domain"":""no"",""image_url""...",1,0,"{""name"":""point"",""cx"":1118,""cy"":147}","{""name"":""head_pos"",""image_quality"":{""good_illu..."
4,019_gaze_bottle.png,1388652,"{""caption"":"""",""public_domain"":""no"",""image_url""...",1,0,"{""name"":""point"",""cx"":996,""cy"":134}","{""name"":""head_pos"",""image_quality"":{""good_illu..."


The required ground truth data are:
- filename
    - This represents the image file at hand.
    - Use filename instead of image for a better construction of ground truth annotation.
- cx,cy
    - This represents the location of the head_pos
    - Placed in region_shape_attributtes

In [18]:
# Get a filename
filename = head_data.iloc[0,0]
filename

'019_gaze_utensils.png'

In [19]:
# Get the cx,cy
region_attr = head_data.iloc[0,5]

# Convert to pandas DataFrame for easy exploration
region = json.loads(region_attr)
cx = region['cx']
cy = region['cy']

In [20]:
idx = 1
root_dir = '../dsp_intent_analyzer_dataset/head_data'

# Format of the Ground Truth
item = {}

img_path = os.path.join(root_dir, head_data.iloc[0,0])
item['image'] = io.imread(img_path)
item['position'] = [region['cx'], region['cy']]

item

{'image': Array([[[224, 214, 203],
         [224, 214, 203],
         [224, 214, 203],
         ...,
         [171, 197, 191],
         [170, 196, 190],
         [169, 194, 189]],
 
        [[224, 214, 203],
         [224, 214, 203],
         [224, 214, 203],
         ...,
         [170, 196, 190],
         [169, 195, 189],
         [169, 194, 189]],
 
        [[225, 215, 203],
         [225, 215, 203],
         [225, 214, 203],
         ...,
         [169, 195, 189],
         [168, 195, 189],
         [168, 195, 189]],
 
        ...,
 
        [[ 30,  30,  30],
         [ 30,  30,  30],
         [ 30,  30,  30],
         ...,
         [122, 135, 137],
         [122, 135, 137],
         [122, 135, 137]],
 
        [[ 30,  30,  30],
         [ 30,  30,  30],
         [ 30,  30,  30],
         ...,
         [120, 134, 135],
         [120, 134, 135],
         [120, 134, 135]],
 
        [[ 30,  30,  30],
         [ 30,  30,  30],
         [ 30,  30,  30],
         ...,
         [118, 132,

Now, we are good for head_data exploration.

In [31]:
# Extract Object data using csv_file
csv_file = '../dsp_intent_analyzer_dataset/object_data.csv'
object_data = pd.read_csv(csv_file)

object_data.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,019_gaze_utensils.png,1324677,{},36,0,"{""name"":""rect"",""x"":74,""y"":422,""width"":322,""hei...","{""backpack"":""25""}"
1,019_gaze_utensils.png,1324677,{},36,1,"{""name"":""rect"",""x"":33,""y"":912,""width"":274,""hei...","{""backpack"":""74""}"
2,019_gaze_utensils.png,1324677,{},36,2,"{""name"":""rect"",""x"":527,""y"":773,""width"":24,""hei...","{""backpack"":""80""}"
3,019_gaze_utensils.png,1324677,{},36,3,"{""name"":""rect"",""x"":622,""y"":669,""width"":104,""he...","{""backpack"":""40""}"
4,019_gaze_utensils.png,1324677,{},36,4,"{""name"":""rect"",""x"":787,""y"":849,""width"":92,""hei...","{""backpack"":""68""}"


In [43]:
# Parameter
idx = 1
file_idx = 0
cls_idx = 6
offset_idx = 5

In [44]:
# Extracting filename
object_data.iloc[idx,file_idx]

'019_gaze_utensils.png'

In [49]:
# Extracting the class
region_cls = json.loads(object_data.iloc[idx,cls_idx])
y_cls = int(region_cls['backpack'])
y_cls

74

In [None]:
# Extracting the object
region_offset = json.loads(object_data.iloc[idx,offset_idx])
xmin = int(region_offset['x'])
ymin = int(region_offset['y'])
xmax = xmin + int(region_offset['width'])
ymax = ymin + int(region_offset['height'])
origin = (xmin,ymin)
endpt  = (xmax,ymax)
y_offset = [origin,endpt]
y_offset

#### Archived

In [48]:
# # # For JSON files
# ## Extract Object Data
# json_path = '../dsp_intent_analyzer_dataset/object_data.json'

# with open(json_path) as json_tmp:
#     json_file = json.load(json_tmp)
#     object_data = pd.DataFrame().from_dict(json_file['_via_img_metadata']).T

# object_data.head()

# # Extracting the offset
# y_offset = json.loads(object_data.iloc[idx,atr])
# def side_len_2_minmax(shape_attr):
#     """
#     Args: Shape attributes: bbox metadata
#     returns the upper_right and lower_left pixel location
#     """
    
#     xmin = shape_attr['x']
#     ymin = shape_attr['y']
#     xmax = shape_attr['x'] + shape_attr['width']
#     ymax = shape_attr['y'] + shape_attr['height']
    
#     return xmin,ymin,xmax,ymax

# side_len_2_minmax(tmp['shape_attributes'])

# # Index
# idx = 1
# attr_idx = 2
# root_dir = '../dsp_intent_analyzer_dataset/object_data'

# # Format the ground truth
# item = {}
# filename = object_data.iloc[idx,0]
# filepath = os.path.join(root_dir, filename)
# item['image'] = io.imread(filepath)

# # Iterate through the objects in the image
# # len(object_data.iloc[idx,attr_idx])
# i = 1

# # object category
# item['cat'] = object_data.iloc[idx,attr_idx][i]['region_attributes']['backpack']

# # Bbox location
# (xmin, ymin, xmax, ymax) = side_len_2_minmax(object_data.iloc[idx,attr_idx][i]['shape_attributes'])
# item['bbox'] = (xmin,ymin,xmax,ymax)
# item

# # View the image
# import matplotlib.patches as mpatches
# import matplotlib.pyplot as plt

# image_label_overlay = item['image']

# fig, ax = plt.subplots(figsize=(10, 6))
# ax.imshow(image_label_overlay)
# rect = mpatches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
#                                   fill=False, edgecolor='red', linewidth=2)
# ax.add_patch(rect)
