In [15]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Generate audio scene observation model

In [22]:
scene_labels = ['campus','courtyard','lab','lobby']
audio_obs_labels = ['airport', 'bus', 'metro', 'metro_station', 'park', 'public_square', 'shopping_mall', 'street_pedestrian', 'street_traffic', 'tram']
cpjku_scene_obs_model_arr = json.load(open('../../results/exp0_obs_models/cpjku_audio_scene_obs_model.json'))

audio_model_df = pd.DataFrame(cpjku_scene_obs_model_arr,columns=audio_obs_labels,index=scene_labels)
audio_model_df.style.set_caption("Audio model")
audio_model_df

Unnamed: 0,airport,bus,metro,metro_station,park,public_square,shopping_mall,street_pedestrian,street_traffic,tram
campus,7.0,16.0,48.0,21.0,190.0,6.0,0.0,0.0,43.0,1947.0
courtyard,3.0,304.0,4.0,7.0,764.0,0.0,0.0,1.0,21.0,1361.0
lab,21.0,155.0,2124.0,32.0,78.0,1.0,0.0,1.0,65.0,20.0
lobby,458.0,46.0,207.0,474.0,970.0,0.0,69.0,18.0,22.0,178.0


# Generate CLIP scene observation models

In [58]:
clip_scene_obs_model_dict = json.load(open('../../results/exp0_obs_models/clip_scene_obs_model.json'))

clip_obs_results_df = pd.DataFrame(np.zeros((6,9)),index=clip_scene_obs_model_dict.keys(),columns=['acc','p_campus','r_campus','p_crtyrd','r_crtyrd','p_lab','r_lab','p_lobby','r_lobby'])

In [78]:
row_labels = ['campus_actual','courtyard_actual','lab_actual','lobby_actual']
col_labels = ['campus_est','courtyard_est','lab_est','lobby_est']

for clip_cfg in clip_scene_obs_model_dict.keys():
    print(clip_cfg)
    model_df = pd.DataFrame(clip_scene_obs_model_dict[clip_cfg]['model'], index=row_labels, columns=col_labels)
    print(model_df)

    acc = float(np.diagonal(clip_scene_obs_model_dict[clip_cfg]['model']).sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model']).sum())
    p_campus = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[0,0].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[:,0].sum())
    r_campus = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[0,0].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[0,:].sum())
    p_crtyrd = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[1,1].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[:,1].sum())
    r_crtyrd = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[1,1].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[1,:].sum())
    p_lab = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[2,2].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[:,2].sum())
    r_lab = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[2,2].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[2,:].sum())
    p_lobby = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[3,3].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[:,3].sum())
    r_lobby = float(np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[3,3].sum()/np.array(clip_scene_obs_model_dict[clip_cfg]['model'])[3,:].sum())

    clip_obs_results_df.at[clip_cfg,'acc'] = acc
    clip_obs_results_df.at[clip_cfg,'p_campus'] = p_campus
    clip_obs_results_df.at[clip_cfg,'r_campus'] = r_campus
    clip_obs_results_df.at[clip_cfg,'p_crtyrd'] = p_crtyrd
    clip_obs_results_df.at[clip_cfg,'r_crtyrd'] = r_crtyrd
    clip_obs_results_df.at[clip_cfg,'p_lab'] = p_lab
    clip_obs_results_df.at[clip_cfg,'r_lab'] = r_lab
    clip_obs_results_df.at[clip_cfg,'p_lobby'] = p_lobby
    clip_obs_results_df.at[clip_cfg,'r_lobby'] = r_lobby
    print()


RN50_basic_description
                  campus_est  courtyard_est  lab_est  lobby_est
campus_actual        38610.0            1.0     41.0        0.0
courtyard_actual     42864.0           35.0      8.0        0.0
lab_actual             773.0            0.0  41409.0       10.0
lobby_actual          4693.0            0.0      1.0    36745.0

RN50_social_in_out_desc
                  campus_est  courtyard_est  lab_est  lobby_est
campus_actual        36047.0           16.0      0.0     2589.0
courtyard_actual     38061.0         4319.0      0.0      527.0
lab_actual               0.0            0.0  41490.0      702.0
lobby_actual             0.0            0.0     40.0    41399.0

ViT-B/32_basic_description
                  campus_est  courtyard_est  lab_est  lobby_est
campus_actual        31512.0          296.0   4306.0     2538.0
courtyard_actual     25070.0         6161.0  11536.0      140.0
lab_actual               0.0            0.0  38289.0     3903.0
lobby_actual             0.0

In [79]:
clip_obs_results_df

Unnamed: 0,acc,p_campus,r_campus,p_crtyrd,r_crtyrd,p_lab,r_lab,p_lobby,r_lobby
RN50_basic_description,0.707059,0.444099,0.998913,0.972222,0.000816,0.998794,0.981442,0.999728,0.886725
RN50_social_in_out_desc,0.746141,0.486412,0.932604,0.996309,0.10066,0.999037,0.983362,0.915563,0.999035
ViT-B/32_basic_description,0.710703,0.556926,0.815275,0.954158,0.14359,0.70734,0.907494,0.862953,1.0
ViT-B/32_social_in_out_desc,0.557431,1.0,0.016429,0.530207,1.0,0.545949,1.0,1.0,0.153189
ViT-L/14_basic_description,0.728646,0.489153,0.987038,0.717157,0.05738,0.975279,0.928494,0.930772,0.979198
ViT-L/14_social_in_out_desc,0.947237,0.995681,0.912527,0.925371,0.996434,0.895268,0.993767,0.994283,0.881295
