In [124]:
import pandas as pd
import h5py
import os

In [125]:
# Compare two identical skim files (input for urbansim from soundcast)
skim1_file = r'R:\SoundCast\UrbanSimAccessibility\Skims\2025\urbansim_skims.h5'
skim2_file = r'R:\SoundCast\UrbanSimAccessibility\Skims\Integrated\2025\urbansim_skims.h5'

skim1_name = 'LUV'
skim2_name = 'new'

results_output_dir = r'R:\SoundCast\UrbanSimAccessibility\Skims\Integrated\2025'

##############

skim1_h5 =  h5py.File(skim1_file)
skim2_h5 =  h5py.File(skim2_file)

skim1_dict = {}
skim2_dict = {}
skim1_df = pd.DataFrame()
skim2_df = pd.DataFrame()

for col in skim1_h5['results'].keys():
    # skim set 1
    #
    skim1_dict[col] = pd.DataFrame(skim1_h5['results'][col][:])
    # Adjust index to be 1-based
    skim1_dict[col].index = skim1_dict[col].index+1
    skim1_dict[col].columns = skim1_dict[col].columns+1
    
    # unstack skim/trip values from origin
    skim1_dict[col] = pd.DataFrame(skim1_dict[col].unstack()).reset_index()
    skim1_dict[col] = skim1_dict[col].rename(columns={'level_0': 'otaz', 'level_1': 'dtaz', 0: col})
    
    if len(skim1_df.columns) == 0:
        skim1_df = skim1_dict[col]
    else:
        skim1_df[col] = skim1_dict[col][col]
        
    skim1_df['source'] = skim1_name
    
    # skim set 2
    #
    skim2_dict[col] = pd.DataFrame(skim2_h5['results'][col][:])
    # Adjust index to be 1-based
    skim2_dict[col].index = skim2_dict[col].index+1
    skim2_dict[col].columns = skim2_dict[col].columns+1

    # unstack skim/trip values from origin
    skim2_dict[col] = pd.DataFrame(skim2_dict[col].unstack()).reset_index()
    skim2_dict[col] = skim2_dict[col].rename(columns={'level_0': 'otaz', 'level_1': 'dtaz', 0: col})

    if len(skim2_df.columns) == 0:
        skim2_df = skim2_dict[col]
    else:
        skim2_df[col] = skim2_dict[col][col]
        
    skim2_df['source'] = skim2_name

In [126]:
# Stack the results and write to file
# df = skim1_df.append(skim2_df)
# df.to_csv(os.path.join(r'R:\SoundCast\UrbanSimAccessibility\Skims\Integrated','od_skim_compare.csv'))

# The stacked results are useful, but also need to map out differences, so take the total or average by zone

# First skim set
##
# First the total
df_1_sum = skim1_df.groupby('otaz').sum()
df_1_sum = df_1_sum.drop('dtaz',axis=1)
# Unstack so that metrics are in a list
df_1_sum = df_1_sum[['aau1cs', 'aau1ds', 'aau1tl', 'aau1tm', 'atrtwa', 'avehda',
       'awlktm',  'lsum1',  'lsum2',  'lsum3',  'lsum4']].unstack().reset_index()
df_1_sum.columns = [['skim_name','otaz','skim_value_sum_'+skim1_name]]

# Also compute the mean values
df_1_avg = skim1_df.groupby('otaz').mean()
df_1_avg = df_1_avg.drop('dtaz',axis=1)
# Unstack so that metrics are in a list
df_1_avg = df_1_avg[['aau1cs', 'aau1ds', 'aau1tl', 'aau1tm', 'atrtwa', 'avehda',
       'awlktm',  'lsum1',  'lsum2',  'lsum3',  'lsum4']].unstack().reset_index()
df_1_avg.columns = [['skim_name','otaz','skim_value_avg_'+skim1_name]]

# Second skim set
##
# First the total
df_2_sum = skim2_df.groupby('otaz').sum()
df_2_sum = df_2_sum.drop('dtaz',axis=1)
# Unstack so that metrics are in a list
df_2_sum = df_2_sum[['aau1cs', 'aau1ds', 'aau1tl', 'aau1tm', 'atrtwa', 'avehda',
       'awlktm',  'lsum1',  'lsum2',  'lsum3',  'lsum4']].unstack().reset_index()
df_2_sum.columns = [['skim_name','otaz','skim_value_sum_'+skim2_name]]

# Also compute the mean values
df_2_avg = skim2_df.groupby('otaz').mean()
df_2_avg = df_2_avg.drop('dtaz',axis=1)
# Unstack so that metrics are in a list
df_2_avg = df_2_avg[['aau1cs', 'aau1ds', 'aau1tl', 'aau1tm', 'atrtwa', 'avehda',
       'awlktm',  'lsum1',  'lsum2',  'lsum3',  'lsum4']].unstack().reset_index()
df_2_avg.columns = [['skim_name','otaz','skim_value_avg_'+skim2_name]]

In [127]:
df = df_1_sum.merge(df_1_avg, on=['otaz','skim_name'])
df = df.merge(df_2_sum, on=['otaz','skim_name'])
df = df.merge(df_2_avg, on=['otaz','skim_name'])
df.to_csv(os.path.join(r'R:\SoundCast\UrbanSimAccessibility\Skims\Integrated','od_skim_compare.csv'))