In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# For reading building.yaml as a Python dict
import yaml # If you don't have this package use 'pip install pyyaml' into commandline or terminal


# Matplotlib Config
%matplotlib inline
plt.style.use('fivethirtyeight')

In [2]:
# Turn 'building.yaml' file into a python dictionary using PyYAML 
with open('building.yaml') as f:
    building_data = f.read()
    building_data = yaml.load(building_data)
    f.close()

In [5]:
## See what keys exist in the dictionary
print(building_data.keys())

dict_keys(['building1'])


In [116]:
building = pd.DataFrame(building_data) #building contains only two rows out of which only one(first one) contains actual info
df = pd.DataFrame(building['building1'][0])
df.shape #there are 6461 rows each denoting a different part of the building

(6461, 6)

In [117]:
df.head()

Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
0,"{'material': 'grey', 'type': 'box', 'dims': [4...","{'orientation': [6.123233995736766e-17, 1.0, 0...",,,NoName10,
1,"{'material': 'grey', 'type': 'box', 'dims': [3...","{'orientation': [0.7071067811865476, 0.7071067...",,,NoName12,
2,"{'material': 'grey', 'type': 'box', 'dims': [4...","{'orientation': [1.0, 0.0, 0.0, 0.0], 'positio...",,,NoName19,
3,"{'material': 'grey', 'type': 'box', 'dims': [8...","{'orientation': [6.123233995736766e-17, 1.0, 0...",,,NoName26,
4,"{'material': 'grey', 'type': 'box', 'dims': [1...","{'orientation': [0.7071067811865476, -0.707106...",,,NoName28,


## NoName parts of the building

In [118]:
NoName = df[df['name'].str.contains("NoName")==True] #out of the 6461 rows, 5879 are 'NoName' rows
NoName.head()
walls = NoName[NoName.visualization_geometry.isnull()] #out of the 5879 rows, 5024 has viz_geo as NULL
#so presumably they're walls (according to the file BUILDING-README)
NoName_floors = NoName[NoName.visualization_geometry.notnull()]#out of 5879 rows,855 has viz_geo values and are of type polygon
#so they must be floors WITHOUT significant spots like entrance/exit/coffee shop

In [122]:
#Collision Geometry split of NoName objects
from collections import Counter
type_ = []
material = []
for i in NoName.collision_geometry.get_values():
    type_.append(i['type'])
    material.append(i['material'])
print(Counter(type_),Counter(material))
#the 5024 grey boxes are walls and 855 polygons are NoName_floors

Counter({'box': 5024, 'polygon': 855}) Counter({'grey': 5024, 'off_white': 843, 'black': 12})


In [139]:
#Config column
from collections import Counter
orientation=[]
position=[]
for i in walls.config.get_values():
    orientation.append(i['orientation'])
    position.append(i['position'])
orientation
    

[[6.123233995736766e-17, 1.0, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0],
 [6.123233995736766e-17, 1.0, 0.0, 0.0],
 [0.7071067811865476, -0.7071067811865475, 0.0, 0.0],
 [0.5011092285551673, -0.8653840425249619, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [6.123233995736766e-17, 1.0, 0.0, 0.0],
 [6.123233995736766e-17, 1.0, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [6.123233995736766e-17, 1.0, 0.0, 0.0],
 [0.7071067811865476, -0.7071067811865475, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0],
 [6.123233995736766e-17, 1.0, 0.0, 0.0],
 [0.7071067811865476, -0.7071067811865475, 0.0, 0.0],
 [0.7071067811865476, 0.7071067811865475, 0.0, 0.0],
 [0.8399642038122861, -0.5426418121689411, 0.0, 0.0],
 [0.8296586882683287, 0.5582709565980271, 0.0, 0.0],
 [0.8396349384565422, 0.5431511485057161, 0.0, 0.0],
 [0.4351373703

## Named parts of the building

In [71]:
floors = df[df['name'].str.contains("NoName")==False] 
#Out of the 6461 rows, 576 has name denoting something - entrance/exit/elevator
np.sort(floors.name.unique()) 

array(['3-mer_5493', '3-space_6056', '4-mer_4594', '4-ramp_4092',
       '4-space_13560', '41st_Street_Entranc_e_South_West_10561',
       '41st_Street_Entrance_North_East_10509',
       '41st_Street_Entrance_North_West_10510',
       '41st_Street_Entrance_South_East_10539',
       '42nd_Street_Entrance_14495', '5-box10_3843', '5-box11_3836',
       '5-box12_3830', '5-box13_3826', '5-box17_8218', '5-box18_3804',
       '5-box19_3793', '5-box1_4011', '5-box4_3537', '5-box5_2749',
       '5-box6_3691', '5-box7_3692', '5-box8_3689', '5-box9_2714',
       '5-brown_stairwell_8204', '5-elec_4037', '5-elev3_3972',
       '5-elev_lobby_3980', '5-elev_mach_room_8199', '5-fan_room1_3923',
       '5-fan_room2_9317', '5-fan_room3_3844', '5-fan_room4_3800',
       '5-fan_room4_3825', '5-hatch_room_4025', '5-lobby1_8191',
       '5-lobby3_3921', '5-lobby6_3907', '5-mach_room_3939',
       '5-mech_equip_room_8193', '5-penthouse_3_4046', '5-ramp_2628',
       '5-stairs1_3950', '5-stairs2_3968', '5-sta

In [72]:
floors.head()

Unnamed: 0,collision_geometry,config,escalator,isElevator,name,visualization_geometry
14,"{'material': 'off_white', 'type': 'polygon', '...","{'orientation': [0, 0, 0, 1], 'position': [0, ...",,False,entry__exit_4_58,"{'material': 'off_white', 'type': 'box', 'dims..."
17,"{'material': 'off_white', 'type': 'polygon', '...","{'orientation': [0, 0, 0, 1], 'position': [0, ...",,False,radio_shack_69,"{'material': 'off_white', 'type': 'box', 'dims..."
27,"{'material': 'off_white', 'type': 'polygon', '...","{'orientation': [0, 0, 0, 1], 'position': [0, ...",,False,casa_java_96,"{'material': 'off_white', 'type': 'box', 'dims..."
41,"{'material': 'off_white', 'type': 'polygon', '...","{'orientation': [0, 0, 0, 1], 'position': [0, ...",,False,vacant_2_132,"{'material': 'off_white', 'type': 'box', 'dims..."
44,"{'material': 'off_white', 'type': 'polygon', '...","{'orientation': [0, 0, 0, 1], 'position': [0, ...",,False,vacant_1_139,"{'material': 'off_white', 'type': 'box', 'dims..."


In [98]:
#Collision Geometry split
from collections import Counter
type_ = []
material = []
for i in floors.collision_geometry.get_values():
    type_.append(i['type'])
    material.append(i['material'])
print(Counter(type_),Counter(material))
    

Counter({'polygon': 576}) Counter({'off_white': 514, 'black': 62})


In [99]:
#Visualization Geometry split
from collections import Counter
type_ = []
material = []
for i in floors.visualization_geometry.get_values():
    type_.append(i['type'])
    material.append(i['material'])
print(Counter(type_),Counter(material))

Counter({'box': 576}) Counter({'off_white': 576})


## Further Exploration

In [141]:
## There is only one key, which further has two keys - Type has one value 'obstacle' so we're interested in 'geometries'
print(building_data['building1'].keys())
print(building_data['building1']['type'])


## 'geometries' is a list with 6461 objects
print(type(building_data['building1']['geometries']))
print(type(building_data['building1']['geometries'][0]))
print(len(building_data['building1']['geometries']))
print(len(building_data['building1']['geometries'][0]))

dict_keys(['geometries', 'type'])
obstacle
<class 'list'>
<class 'dict'>
6461
3


In [142]:
#this is how one element of geometries looks like

building_data['building1']['geometries'][0]

#its a list of dictionaries  with 3 keys - collision_geometry, config and name
#collision

{'collision_geometry': {'dims': [4.150000000000006, 0.15, 2.99],
  'material': 'grey',
  'type': 'box'},
 'config': {'orientation': [6.123233995736766e-17, 1.0, 0.0, 0.0],
  'position': [114.325, -45.14, 7.5]},
 'name': 'NoName10'}

In [143]:
## Print first ten elements of the list
print(building_data['building1']['geometries'][:10])

[{'collision_geometry': {'material': 'grey', 'type': 'box', 'dims': [4.150000000000006, 0.15, 2.99]}, 'config': {'orientation': [6.123233995736766e-17, 1.0, 0.0, 0.0], 'position': [114.325, -45.14, 7.5]}, 'name': 'NoName10'}, {'collision_geometry': {'material': 'grey', 'type': 'box', 'dims': [3.6099999999999994, 0.15, 2.99]}, 'config': {'orientation': [0.7071067811865476, 0.7071067811865475, 0.0, 0.0], 'position': [116.4, -46.945, 7.5]}, 'name': 'NoName12'}, {'collision_geometry': {'material': 'grey', 'type': 'box', 'dims': [47.69000000000001, 0.15, 2.99]}, 'config': {'orientation': [1.0, 0.0, 0.0, 0.0], 'position': [92.445, -62.4, 7.5]}, 'name': 'NoName19'}, {'collision_geometry': {'material': 'grey', 'type': 'box', 'dims': [8.450000000000003, 0.15, 2.99]}, 'config': {'orientation': [6.123233995736766e-17, 1.0, 0.0, 0.0], 'position': [33.085, -63.02, 7.5]}, 'name': 'NoName26'}, {'collision_geometry': {'material': 'grey', 'type': 'box', 'dims': [10.979999999999997, 0.15, 2.99]}, 'confi

In [144]:
## 'type' seems interesting in the 'geometries' dictionary
## Iterate over the elements, add it to a list and use 'set' to get the unique 'types'

type_list = []
for i in building_data['building1']['geometries']:
    type_list.append(i['collision_geometry']['type'])
    
## Set gives you the unique elements in a list
print(set(type_list))

{'polygon', 'box'}


In [145]:
## Look at items that are of type 'box'
box_type = []
for i in building_data['building1']['geometries']:
    if i['collision_geometry']['type'] == 'box':
        box_type.append(i['name'])

len(box_type)
#print(box_type)
## Most of them are NoName objects so let's look at type == 'polygons'

5030

### Set of Z values for type polygon

In [146]:
## Look at items that are of type 'polygon'
polygon_type = []
check = []
for i in building_data['building1']['geometries']:
    if i['collision_geometry']['type'] == 'polygon':
        polygon_type.append(i['name'])
        check.append(i['collision_geometry']['triangles'][2])

len(polygon_type)
set(check) #set of Z values
#print(polygon_type)
## These look far more interesting from an entry/exit perspective. A lot more descriptive
## compared to type 'box'.

{0.0,
 1.5219512195121945,
 3.0,
 3.9056603773584877,
 3.9898477157360412,
 4.320000000000002,
 6.0,
 6.831325301204819,
 7.431372549019607,
 8.024999999999999,
 8.149999999999999,
 9.0,
 9.59016393442623,
 10.853932584269664,
 10.954983922829582,
 11.608695652173914,
 12.0,
 15.0,
 18.0,
 21.0,
 24.0}

In [147]:
## Let's try another approach where we print out all 'names' that are not 'NoName' to see what different type of
## objects exist.

for i in range(0,len(building_data['building1']['geometries'])):
    if 'NoName' not in str(building_data['building1']['geometries'][i]['name']):
        print(building_data['building1']['geometries'][i]['name'])
        
## Pretty much the same result as the polygon/box differentiation.

entry__exit_4_58
radio_shack_69
casa_java_96
vacant_2_132
vacant_1_139
Hudson_News_Stand_199
men_s_room_256
info_booth_1_306
cake_boss_454
entry_exit_7_455
entry_exit_stairs_544
blue_and_red_stairwell_550
entry_exit_591
hudson_news_2_623
SF_office_1_668
Escalator_Second_7_2_679
Escalator_Second_7_3_712
Munchy_s_724
stair_D_729
_731
_751
vacant_1_805
hudson_news_2_807
sweet_factory_862
mrs__field_s_cookies_866
loading_dock_868
trash_871
exit_entry_1_1002
exit_entry_ramp_1004
Lobby_4_1008
Escalator_Second_5_1_1195
escalator_26_1281
1329
Aunty_Anne_s_1331
storage_room_1338
lottery_1347
passageway_3_1384
_1394
passageway_2_1406
passageway_1_1408
telephones_1423
escalator_and_entry_exit_1442
Bolton_s_1445
_1460
strawberry_1470
wl_stairwell_1471
elec_room_3_1502
Escalator_Second_6_2_1512
Escalator_Second_6_4_1520
Escalator_Second_6_3_1528
Escalator_Second_8_2_1539
cafe_metro_1545
ticket_1555
shortline_tickets_1564
Women_s_room_1594
Men_s_room_2_1621
hudson_news_and_storage_1639
Mcann_s_cafe_

## List of entrances/exits

In [148]:
## For an initial run, let's look at objects that have the words 'exit' 'entry' 'entrance' in their name
## and add their info to a list.
## There could be more than these entrances/exits with other names?? - this is just for a starting point

In [149]:
exit_entry_list = []

for i in range(0,len(building_data['building1']['geometries'])):
    for keyword in ['exit','entry','entrance']:
        if (keyword in str(building_data['building1']['geometries'][i]['name']).lower()):
            exit_entry_list.append(building_data['building1']['geometries'][i])

print(len(exit_entry_list))
print(len(exit_entry_list[0]))

26
5


In [151]:
## So we'd probably be interested in the triangle coordiantes - Still need to figure out how these work
## Lets take the first element of our entry_exit list as test

In [152]:
test_coordinates = exit_entry_list[0]['collision_geometry']['triangles']

## Entrance/Exits with their coordinates

In [159]:
# Convert list of Entrances/Exit to PANDAS Dataframe

df_list = []

# Iterate over the exit_entry_list to get 'names' and 'triangles' points
# Turn into dictionary and append to df_list for creation of dataframe

for i in exit_entry_list:
    xyz_list = []
    coordinates = i['collision_geometry']['triangles']
    for j in range(0,len(coordinates),3):
        xyz_list.append((coordinates[j],coordinates[j+1],coordinates[j+2]))
    x = coordinates[::3]
    y = coordinates[1::3]
    z = coordinates[2::3]
    data = {'name' : i['name'],
           'x' : x,
           'y' : y,
           'z' : z,
            'triangles' : xyz_list, #list of triples
            'no_of_triangles' : len(xyz_list),  #no. of triangles used to represent each entrance/exit varies
           }
    
    df_list.append(data)

# Create the dataframe
df = pd.DataFrame(df_list)
df.head(10)

# Because the way we look for 'exit' , 'entrance' and 'exit' in our list exit_entry_list, some elements appear twice 
# (those that have 2 of those words at the same time in their name)

df = df.drop_duplicates(subset='name',keep='first')
df.count()
df.head()

Unnamed: 0,name,no_of_triangles,triangles,x,y,z
0,entry__exit_4_58,6,"[(100.6, -17.01, 6.0), (100.6, -11.75, 6.0), (...","[100.6, 100.6, 92.88, 100.6, 92.88, 92.88]","[-17.01, -11.75, -11.75, -17.01, -11.75, -17.01]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
2,entry_exit_7_455,6,"[(120.12, 60.33, 6.0), (122.93, 64.5, 6.0), (1...","[120.12, 122.93, 120.16, 120.12, 123.0, 122.93]","[60.33, 64.5, 64.5, 60.33, 60.28, 64.5]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
4,entry_exit_stairs_544,6,"[(46.24, 9.3, 6.0), (53.04, 9.3, 6.0), (53.04,...","[46.24, 53.04, 53.04, 46.24, 53.04, 46.08]","[9.3, 9.3, 17.1, 9.3, 17.1, 16.96]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
6,entry_exit_591,9,"[(107.77, 10.18, 6.0), (107.42, 16.32, 6.0), (...","[107.77, 107.42, 103.53, 103.54, 107.77, 107.7...","[10.18, 16.32, 16.7, 9.3, 8.9, 10.18, 9.3, 10....","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
8,exit_entry_1_1002,6,"[(32.3, -20.64, 6.0), (36.34, -11.79, 6.0), (3...","[32.3, 36.34, 32.3, 32.3, 36.36, 36.34]","[-20.64, -11.79, -11.75, -20.64, -20.75, -11.79]","[6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"


In [25]:
# Convert to CSV for usage in other files
df.to_csv('entrance_exit_points.csv',index=False)

In [304]:
#Z value for type polygon ranges upto 24 whereas Z value of entrance/exits goes only upto 6.
#Are there no entrances/exits in the floors above that??????

In [160]:
!jupyter nbconvert --to script Buildings_Exploratory.ipynb

[NbConvertApp] Converting notebook Buildings_Exploratory.ipynb to script
[NbConvertApp] Writing 8290 bytes to Buildings_Exploratory.py
