In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# For reading building.yaml as a Python dict
import yaml # If you don't have this package use 'pip install pyyaml' into commandline or terminal


# Matplotlib Config
%matplotlib inline
plt.style.use('fivethirtyeight')
# Turn 'building.yaml' file into a python dictionary using PyYAML 
with open('building.yaml') as f:
    building_data = f.read()
    building_data = yaml.load(building_data)
    f.close()

In [2]:
## See what keys exist in the dictionary
print(building_data.keys())
building = pd.DataFrame(building_data) #building contains only two rows out of which only one(first one) contains actual info
df = pd.DataFrame(building['building1'][0])
df.shape #there are 6461 rows each denoting a different part of the building

dict_keys(['building1'])


(6461, 6)

In [3]:
df.head()

Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
0,"{'type': 'box', 'material': 'grey', 'dims': [4...","{'position': [114.325, -45.14, 7.5], 'orientat...",,,NoName10,
1,"{'type': 'box', 'material': 'grey', 'dims': [3...","{'position': [116.4, -46.945, 7.5], 'orientati...",,,NoName12,
2,"{'type': 'box', 'material': 'grey', 'dims': [4...","{'position': [92.445, -62.4, 7.5], 'orientatio...",,,NoName19,
3,"{'type': 'box', 'material': 'grey', 'dims': [8...","{'position': [33.085, -63.02, 7.5], 'orientati...",,,NoName26,
4,"{'type': 'box', 'material': 'grey', 'dims': [1...","{'position': [37.31, -68.51, 7.5], 'orientatio...",,,NoName28,


## NoName parts of the building

In [4]:
NoName = df[df['name'].str.contains("NoName")==True] #out of the 6461 rows, 5879 are 'NoName' rows
NoName.head()
walls_ = NoName[NoName.visualization_geometry.isnull()] #out of the 5879 rows, 5024 has viz_geo as NULL
#so presumably they're walls (according to the file BUILDING-README)
NoName_floors = NoName[NoName.visualization_geometry.notnull()]#out of 5879 rows,855 has viz_geo values and are of type polygon
#so they must be floors WITHOUT significant spots like entrance/exit/coffee shop
NoName_floors.head()

Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
323,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,NoName_895,"{'type': 'box', 'material': 'off_white', 'dims..."
327,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,NoName_903,"{'type': 'box', 'material': 'off_white', 'dims..."
624,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,NoName_1584,"{'type': 'box', 'material': 'off_white', 'dims..."
1140,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,NoName_2652,"{'type': 'box', 'material': 'off_white', 'dims..."
1152,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,NoName_2677,"{'type': 'box', 'material': 'off_white', 'dims..."


In [5]:
NoName_floors.shape

(855, 6)

In [6]:
#Collision Geometry split of NoName objects
from collections import Counter
type_ = []
material = []
for i in NoName.collision_geometry.get_values():
    type_.append(i['type'])
    material.append(i['material'])
print(Counter(type_),Counter(material))
#the 5024 grey boxes are walls and 855 polygons are NoName_floors

Counter({'box': 5024, 'polygon': 855}) Counter({'grey': 5024, 'off_white': 843, 'black': 12})


## Walls: All parts without viz_geometry

In [7]:

walls_.head()

Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
0,"{'type': 'box', 'material': 'grey', 'dims': [4...","{'position': [114.325, -45.14, 7.5], 'orientat...",,,NoName10,
1,"{'type': 'box', 'material': 'grey', 'dims': [3...","{'position': [116.4, -46.945, 7.5], 'orientati...",,,NoName12,
2,"{'type': 'box', 'material': 'grey', 'dims': [4...","{'position': [92.445, -62.4, 7.5], 'orientatio...",,,NoName19,
3,"{'type': 'box', 'material': 'grey', 'dims': [8...","{'position': [33.085, -63.02, 7.5], 'orientati...",,,NoName26,
4,"{'type': 'box', 'material': 'grey', 'dims': [1...","{'position': [37.31, -68.51, 7.5], 'orientatio...",,,NoName28,


In [8]:
#decomposing walls
df_list = []
for i in walls_.iterrows():
    data = { 'type' : i[1]['collision_geometry']['type'],
            'material' : i[1]['collision_geometry']['material'],
            'dimension' : i[1]['collision_geometry']['dims'],
            'orientation' : i[1]['config']['orientation'],
            'position' : i[1]['config']['position'],
            'name' : i[1]['name'] }
    df_list.append(data)
walls = pd.DataFrame(df_list)
walls.head()      

Unnamed: 0,dimension,material,name,orientation,position,type
0,"[4.150000000000006, 0.15, 2.99]",grey,NoName10,"[6.123233995736766e-17, 1.0, 0.0, 0.0]","[114.325, -45.14, 7.5]",box
1,"[3.6099999999999994, 0.15, 2.99]",grey,NoName12,"[0.7071067811865476, 0.7071067811865475, 0.0, ...","[116.4, -46.945, 7.5]",box
2,"[47.69000000000001, 0.15, 2.99]",grey,NoName19,"[1.0, 0.0, 0.0, 0.0]","[92.445, -62.4, 7.5]",box
3,"[8.450000000000003, 0.15, 2.99]",grey,NoName26,"[6.123233995736766e-17, 1.0, 0.0, 0.0]","[33.085, -63.02, 7.5]",box
4,"[10.979999999999997, 0.15, 2.99]",grey,NoName28,"[0.7071067811865476, -0.7071067811865475, 0.0,...","[37.31, -68.51, 7.5]",box


## Named parts of the building

In [9]:
floors = df[df['name'].str.contains("NoName")==False] 
floors.head()
#Out of the 6461 rows, 576 has name denoting something - entrance/exit/elevator


Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
14,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,entry__exit_4_58,"{'type': 'box', 'material': 'off_white', 'dims..."
17,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,radio_shack_69,"{'type': 'box', 'material': 'off_white', 'dims..."
27,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,casa_java_96,"{'type': 'box', 'material': 'off_white', 'dims..."
41,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,vacant_2_132,"{'type': 'box', 'material': 'off_white', 'dims..."
44,"{'type': 'polygon', 'material': 'off_white', '...","{'position': [0, 0, 0], 'orientation': [0, 0, ...",,False,vacant_1_139,"{'type': 'box', 'material': 'off_white', 'dims..."


In [10]:
df[df['visualization_geometry'].notnull()].shape

(1431, 6)

In [11]:
floors.shape

(576, 6)

## Further Exploration

In [12]:
## There is only one key, which further has two keys - Type has one value 'obstacle' so we're interested in 'geometries'
print(building_data['building1'].keys())
print(building_data['building1']['type'])


## 'geometries' is a list with 6461 objects
print(type(building_data['building1']['geometries']))
print(type(building_data['building1']['geometries'][0]))
print(len(building_data['building1']['geometries']))
print(len(building_data['building1']['geometries'][0]))

dict_keys(['type', 'geometries'])
obstacle
<class 'list'>
<class 'dict'>
6461
3


In [13]:
## Print first ten elements of the list
print(building_data['building1']['geometries'][:10])

[{'name': 'NoName10', 'collision_geometry': {'type': 'box', 'material': 'grey', 'dims': [4.150000000000006, 0.15, 2.99]}, 'config': {'position': [114.325, -45.14, 7.5], 'orientation': [6.123233995736766e-17, 1.0, 0.0, 0.0]}}, {'name': 'NoName12', 'collision_geometry': {'type': 'box', 'material': 'grey', 'dims': [3.6099999999999994, 0.15, 2.99]}, 'config': {'position': [116.4, -46.945, 7.5], 'orientation': [0.7071067811865476, 0.7071067811865475, 0.0, 0.0]}}, {'name': 'NoName19', 'collision_geometry': {'type': 'box', 'material': 'grey', 'dims': [47.69000000000001, 0.15, 2.99]}, 'config': {'position': [92.445, -62.4, 7.5], 'orientation': [1.0, 0.0, 0.0, 0.0]}}, {'name': 'NoName26', 'collision_geometry': {'type': 'box', 'material': 'grey', 'dims': [8.450000000000003, 0.15, 2.99]}, 'config': {'position': [33.085, -63.02, 7.5], 'orientation': [6.123233995736766e-17, 1.0, 0.0, 0.0]}}, {'name': 'NoName28', 'collision_geometry': {'type': 'box', 'material': 'grey', 'dims': [10.979999999999997, 

In [14]:
## 'type' seems interesting in the 'geometries' dictionary
## Iterate over the elements, add it to a list and use 'set' to get the unique 'types'

type_list = []
for i in building_data['building1']['geometries']:
    type_list.append(i['collision_geometry']['type'])
    
## Set gives you the unique elements in a list
print(set(type_list))

{'polygon', 'box'}


In [15]:
## Look at items that are of type 'box'
box_type = []
for i in building_data['building1']['geometries']:
    if i['collision_geometry']['type'] == 'box':
        box_type.append(i['name'])

len(box_type)
#print(box_type)
## Most of them are NoName objects so let's look at type == 'polygons'

5030

In [16]:
## Let's try another approach where we print out all 'names' that are not 'NoName' to see what different type of
## objects exist.

for i in range(0,len(building_data['building1']['geometries'])):
    if 'NoName' not in str(building_data['building1']['geometries'][i]['name']):
        print(building_data['building1']['geometries'][i]['name'])
        
## Pretty much the same result as the polygon/box differentiation.

entry__exit_4_58
radio_shack_69
casa_java_96
vacant_2_132
vacant_1_139
Hudson_News_Stand_199
men_s_room_256
info_booth_1_306
cake_boss_454
entry_exit_7_455
entry_exit_stairs_544
blue_and_red_stairwell_550
entry_exit_591
hudson_news_2_623
SF_office_1_668
Escalator_Second_7_2_679
Escalator_Second_7_3_712
Munchy_s_724
stair_D_729
_731
_751
vacant_1_805
hudson_news_2_807
sweet_factory_862
mrs__field_s_cookies_866
loading_dock_868
trash_871
exit_entry_1_1002
exit_entry_ramp_1004
Lobby_4_1008
Escalator_Second_5_1_1195
escalator_26_1281
1329
Aunty_Anne_s_1331
storage_room_1338
lottery_1347
passageway_3_1384
_1394
passageway_2_1406
passageway_1_1408
telephones_1423
escalator_and_entry_exit_1442
Bolton_s_1445
_1460
strawberry_1470
wl_stairwell_1471
elec_room_3_1502
Escalator_Second_6_2_1512
Escalator_Second_6_4_1520
Escalator_Second_6_3_1528
Escalator_Second_8_2_1539
cafe_metro_1545
ticket_1555
shortline_tickets_1564
Women_s_room_1594
Men_s_room_2_1621
hudson_news_and_storage_1639
Mcann_s_cafe_

## List of entrances/exits

In [17]:
## For an initial run, let's look at objects that have the words 'exit' 'entry' 'entrance' in their name
## and add their info to a list.
## There could be more than these entrances/exits with other names?? - this is just for a starting point

In [18]:
exit_entry_list = []

for i in range(0,len(building_data['building1']['geometries'])):
    for keyword in ['exit','entry','entrance']:
        if (keyword in str(building_data['building1']['geometries'][i]['name']).lower()):
            exit_entry_list.append(building_data['building1']['geometries'][i])

print(len(exit_entry_list))
print(len(exit_entry_list[0]))

26
5


In [29]:
new_exit_entry_list = []

for i in range(0,len(building_data['building1']['geometries'])):
    for keyword in ['exit','entry','entrance','njt','nj_tr','greyhound']:
        if (keyword in str(building_data['building1']['geometries'][i]['name']).lower()):
            new_exit_entry_list.append(building_data['building1']['geometries'][i])

print(len(new_exit_entry_list))
print(len(new_exit_entry_list[0]))

107
5


In [30]:
## So we'd probably be interested in the triangle coordiantes - Still need to figure out how these work
## Lets take the first element of our entry_exit list as test

In [31]:
test_coordinates = exit_entry_list[0]['collision_geometry']['triangles']

## Entrance/Exits with their coordinates

In [32]:
# Convert list of Entrances/Exit to PANDAS Dataframe

df_list = []

# Iterate over the exit_entry_list to get 'names' and 'triangles' points
# Turn into dictionary and append to df_list for creation of dataframe

for i in new_exit_entry_list:
    xyz_list = []
    coordinates = i['collision_geometry']['triangles']
    for j in range(0,len(coordinates),3):
        xyz_list.append((coordinates[j],coordinates[j+1],coordinates[j+2]))
    x = coordinates[::3]
    y = coordinates[1::3]
    z = coordinates[2::3]
    data = {'name' : i['name'],
           'x' : x,
           'y' : y,
           'z' : z,
            'triangles' : xyz_list, #list of triples
            'no_of_triangles' : len(xyz_list),  #no. of triangles used to represent each entrance/exit varies
           }
    
    df_list.append(data)

# Create the dataframe
df = pd.DataFrame(df_list)
df.head(10)

# Because the way we look for 'exit' , 'entrance' and 'exit' in our list exit_entry_list, some elements appear twice 
# (those that have 2 of those words at the same time in their name)

df = df.drop_duplicates(subset='name',keep='first')
df.count()
df.head()
exits = df

In [33]:
exits.head(100)

Unnamed: 0,name,no_of_triangles,triangles,x,y,z
0,entry__exit_4_58,6,"[(100.6, -17.01, 6.0), (100.6, -11.75, 6.0), (...","[100.6, 100.6, 92.88, 100.6, 92.88, 92.88]","[-17.01, -11.75, -11.75, -17.01, -11.75, -17.01]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
2,entry_exit_7_455,6,"[(120.12, 60.33, 6.0), (122.93, 64.5, 6.0), (1...","[120.12, 122.93, 120.16, 120.12, 123.0, 122.93]","[60.33, 64.5, 64.5, 60.33, 60.28, 64.5]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
4,entry_exit_stairs_544,6,"[(46.24, 9.3, 6.0), (53.04, 9.3, 6.0), (53.04,...","[46.24, 53.04, 53.04, 46.24, 53.04, 46.08]","[9.3, 9.3, 17.1, 9.3, 17.1, 16.96]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
6,entry_exit_591,9,"[(107.77, 10.18, 6.0), (107.42, 16.32, 6.0), (...","[107.77, 107.42, 103.53, 103.54, 107.77, 107.7...","[10.18, 16.32, 16.7, 9.3, 8.9, 10.18, 9.3, 10....","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
8,exit_entry_1_1002,6,"[(32.3, -20.64, 6.0), (36.34, -11.79, 6.0), (3...","[32.3, 36.34, 32.3, 32.3, 36.36, 36.34]","[-20.64, -11.79, -11.75, -20.64, -20.75, -11.79]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
10,exit_entry_ramp_1004,6,"[(32.3, -20.64, 6.0), (30.31, -11.75, 6.0), (3...","[32.3, 30.31, 30.39, 32.3, 32.3, 30.31]","[-20.64, -11.75, -20.67, -20.64, -11.75, -11.75]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
12,escalator_and_entry_exit_1442,27,"[(89.34, -17.41, 6.0), (89.54, -11.75, 6.0), (...","[89.34, 89.54, 84.15, 89.34, 84.15, 84.15, 84....","[-17.41, -11.75, -14.7, -17.41, -14.7, -17.43,...","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, ..."
14,NJT-223_4735,9,"[(-99.34, -23.12, 15.0), (-96.01, -23.12, 15.0...","[-99.34, -96.01, -96.01, -96.01, -96.01, -99.6...","[-23.12, -23.12, -21.47, -21.47, -20.1, -21.74...","[15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15...."
15,NJT-222_4752,9,"[(-101.13, -28.64, 15.0), (-101.15, -27.2, 15....","[-101.13, -101.15, -105.31, -104.93, -101.13, ...","[-28.64, -27.2, -29.03, -30.03, -28.64, -29.03...","[15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15...."
16,Greyhound_Bag_Check_7121,12,"[(82.2, 38.5, 0.0), (82.2, 36.5, 0.0), (83.0, ...","[82.2, 82.2, 83.0, 82.2, 82.2, 83.0, 82.2, 83....","[38.5, 36.5, 35.0, 36.5, 35.0, 35.0, 38.5, 40....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [25]:
# Convert to CSV for usage in other files
df.to_csv('entrance_exit_points.csv',index=False)

### All parts of the building decomposed

In [58]:
#taking the superset of all x,y,z coordinates from the building data
#there are three parts to the building - walls, NoName_floors, Named_floors(consists of entrance/exits/shops etc)

#PART_I - Coordinates of all floors
df_list = []
for i in floors.iterrows():
    xyz_list = []
    coordinates = i[1]['collision_geometry']['triangles']
    for j in range(0,len(coordinates),3):
        xyz_list.append((coordinates[j],coordinates[j+1],coordinates[j+2]))
    data = {'part' : 'floor',
            'name' : i[1]['name'],
            'xyz_list' : xyz_list, #list of triples
            'no_of_xyz' : len(xyz_list),  #no. of triangles used to represent each entrance/exit varies
            'type' : 'triangle'
           }
    
    df_list.append(data)
print(len(df_list))   

#PART-II - Coordinates of all walls
for i in walls.iterrows():
    data = {'part' : 'walls',
           'name' : i[1]['name'],
           'xyz_list' : i[1]['position'],
           'no_of_xyz' : len(xyz_list),
           'type': 'box'}
    df_list.append(data)
print(len(df_list)) 

#PART-III - Coordinates of all NoName_floors
for i in NoName_floors.iterrows():
    xyz_list = []
    coordinates = i[1]['collision_geometry']['triangles']
    for j in range(0,len(coordinates),3):
        xyz_list.append((coordinates[j],coordinates[j+1],coordinates[j+2]))
    data = {'part' : 'floor',
            'name' : i[1]['name'],
            'xyz_list' : xyz_list, #list of triples
            'no_of_xyz' : len(xyz_list),  #no. of triangles used to represent each entrance/exit varies
            'type' : 'triangle'
           }
    
    df_list.append(data)
print(len(df_list))                          
                         
# Create the dataframe
df = pd.DataFrame(df_list)
df.head(10)


576
5600
6455


Unnamed: 0,name,no_of_xyz,part,type,xyz_list
0,entry__exit_4_58,6,floor,triangle,"[(100.6, -17.01, 6.0), (100.6, -11.75, 6.0), (..."
1,radio_shack_69,12,floor,triangle,"[(60.61, -30.29, 6.0), (53.1, -28.36, 6.0), (5..."
2,casa_java_96,12,floor,triangle,"[(28.81, -23.06, 6.0), (26.63, -27.91, 6.0), (..."
3,vacant_2_132,12,floor,triangle,"[(-33.71, -29.62, 6.0), (-33.3, -12.5, 6.0), (..."
4,vacant_1_139,12,floor,triangle,"[(-44.05, -12.5, 6.0), (-49.77, -12.5, 6.0), (..."
5,Hudson_News_Stand_199,6,floor,triangle,"[(60.8, -40.26, 6.0), (80.45, -40.26, 6.0), (8..."
6,men_s_room_256,12,floor,triangle,"[(54.71, -23.0, 9.0), (41.9, -18.78, 9.0), (41..."
7,info_booth_1_306,18,floor,triangle,"[(101.75, -38.4, 6.0), (103.67, -38.39, 6.0), ..."
8,cake_boss_454,24,floor,triangle,"[(106.69, 64.5, 6.0), (120.12, 60.33, 6.0), (1..."
9,entry_exit_7_455,6,floor,triangle,"[(120.12, 60.33, 6.0), (122.93, 64.5, 6.0), (1..."


In [57]:
df.shape

(6455, 5)

In [25]:
!jupyter nbconvert --to script Buildings_Exploratory.ipynb

[NbConvertApp] Converting notebook Buildings_Exploratory.ipynb to script
[NbConvertApp] Writing 8977 bytes to Buildings_Exploratory.py
