# Explore spatial organization dataset

## Import libraries and set up paths

In [8]:
import os
import sys
import json
from tqdm import tqdm

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sys.path.append("/math111a_finalproject")
from utils.utils import *

In [22]:
proj_dir = os.path.abspath('..')
data_dir = os.path.join(proj_dir, 'data')
util_dir = os.path.join(proj_dir, 'utils')
analysis_dir = os.path.join(proj_dir, 'analysis')

## Read in data

In [56]:
df = pd.read_csv(os.path.join(data_dir, 'metadata.csv'))
df["labels"] = df["labels"].apply(eval)
df["instances"] = df["instances"].apply(eval)

imgmat = np.memmap(os.path.join(data_dir, 'image_arr.npy'), dtype='uint8', mode='r', 
                   offset=128, shape=(1449,480,640,3))
depmat = np.memmap(os.path.join(data_dir, 'depth_arr.npy'), dtype='<f4', mode='r', 
                   offset=128, shape=(1449,480,640))
labmat = np.memmap(os.path.join(data_dir, 'label_arr.npy'), dtype='uint16', mode='r', 
                  offset=128, shape=(1449,480,640))
instmat = np.memmap(os.path.join(data_dir, 'instance_arr.npy'), dtype='uint8', mode='r', 
                  offset=128, shape=(1449,480,640))

In [57]:
df.head()

Unnamed: 0,index,scene,scene_type,labels,instances
0,0,kitchen_0004,kitchen,"[book, bottle, cabinet, ceiling, chair, cone, ...","[book_1, bottle_1, cabinet_1, cabinet_2, ceili..."
1,1,kitchen_0004,kitchen,"[bottle, bowl, cabinet, ceiling, counter, dish...","[bottle_1, bottle_2, bottle_3, bottle_4, bottl..."
2,2,office_0003,office,"[air vent, book, box, chair, door, door knob, ...","[air vent_1, air vent_2, book_1, box_1, chair_..."
3,3,office_0003,office,"[bottle, cabinet, chair, cork board, cup, desk...","[bottle_1, bottle_10, bottle_11, bottle_2, bot..."
4,4,office_0004,office,"[air duct, basket, bottle, cabinet, camera, ce...","[air duct_1, basket_1, basket_2, bottle_1, cab..."


In [58]:
df.scene_type.value_counts()

bedroom            383
kitchen            225
living_room        221
bathroom           121
dining_room        117
office              78
home_office         50
classroom           49
bookstore           36
playroom            31
furniture_store     27
study               25
reception_room      17
office_kitchen      10
study_room           7
basement             7
computer_lab         6
conference_room      5
student_lounge       5
cafe                 5
home_storage         5
foyer                4
dinette              4
excercise_room       3
printer_room         3
laundry_room         3
indoor_balcony       2
Name: scene_type, dtype: int64

In [59]:
to_1D(df.labels).value_counts()

unlabeled            1449
wall                 1409
floor                1246
picture               818
cabinet               664
                     ... 
scenary                 1
shower hose             1
water fountain          1
toy bin                 1
toy cash register       1
Length: 895, dtype: int64

In [73]:
labs_bool = boolean_df(df.labels, to_1D(df.labels).unique())
labs_corr = labs_bool.corr(method = "pearson")
np.fill_diagonal(labs_corr.values, 0)

In [74]:
labs_corr.idxmax()

book                bookshelf
bottle                   sink
cabinet               counter
ceiling                 light
chair                   table
                     ...     
sewing machine         candle
rolled up rug     candlestick
doily                decanter
coffee pot             shofar
torah                 platter
Length: 895, dtype: object

In [168]:
grouped  = (
    df.groupby('scene')
    .first()
    .reset_index()
    .groupby('scene_type')
    .labels.apply(to_1D)
    .droplevel(level=1)
).groupby('scene_type')

In [187]:
unique_objects = {}
for name, df in grouped:
    unique_objects[name] = {*df.to_list()}

In [197]:
len(unique_objects['bedroom'].intersection(unique_objects['bathroom'])) / \
       len(unique_objects['bedroom'].union(unique_objects['bathroom']))

0.21739130434782608

In [215]:
rooms = list(unique_objects.keys())
sim_mat = pd.DataFrame(index=rooms, columns=rooms)
for room1 in rooms:
    for room2 in rooms:
        sim_mat.at[room1, room2] = len(unique_objects[room1].intersection(unique_objects[room2])) / \
                                   len(unique_objects[room1].union(unique_objects[room2]))
np.fill_diagonal(sim_mat.values, 0)
sim_mat = sim_mat.fillna(0)

In [223]:
sim_mat.idxmax()

basement              home_storage
bathroom                 classroom
bedroom                living_room
bookstore               study_room
cafe                indoor_balcony
classroom                   office
computer_lab            study_room
conference_room           basement
dinette             indoor_balcony
dining_room            living_room
excercise_room      student_lounge
foyer               reception_room
furniture_store     reception_room
home_office                  study
home_storage              basement
indoor_balcony             dinette
kitchen                dining_room
laundry_room       conference_room
living_room            dining_room
office                   classroom
office_kitchen              office
playroom               home_office
printer_room                 foyer
reception_room      student_lounge
student_lounge      reception_room
study                  home_office
study_room            computer_lab
dtype: object

In [202]:
labs_corr.max()

book              0.202891
bottle            0.450684
cabinet           0.526995
ceiling           0.438108
chair             0.406655
                    ...   
sewing machine    0.183891
rolled up rug     0.127555
doily             0.446595
coffee pot        0.576951
torah             0.315243
Length: 895, dtype: float64