In [1]:
import pandas as pd

In [2]:
file_path = '/Users/rose775/Library/CloudStorage/OneDrive-PNNL/Desktop/Projects/NEI/all_8760_dfs.pkl'

In [3]:
dfs_dict = pd.read_pickle(file_path)

In [4]:
rows = []

In [5]:
# Iterate through each df in the dictionary
for df_name, df in dfs_dict.items():
    # Extract information from the df name
    upgrade = df_name.split("_")[1]
    state = df_name.split("_")[3]
    bldg_id = df_name.split("_")[6]
    
    # Perform calculations
    total_hours = len(df)
    comfort_hours = df[(df['out.zone_mean_air_temp.conditioned_space.c'] <= 23) & 
                       (df['out.zone_mean_air_temp.conditioned_space.c'] >= 20)].shape[0]
    discomfort_percent = ((total_hours - comfort_hours) / total_hours) * 100
    
    # Append the row to the list
    rows.append({
        "id": df_name,
        "upgrade": upgrade,
        "state": state,
        "bldg_id": bldg_id,
        "occ_comfort_hours": comfort_hours,
        "discomfort_percent": discomfort_percent
    })

# Create the new df from the list of rows
result_df = pd.DataFrame(rows)


In [6]:
# Group by 'upgrade' and 'state' and calculate mean of 'occ_comfort_hours' and 'discomfort_percent'
grouped_df = result_df.groupby(['upgrade', 'state']).agg(
    mean_occ_comfort_hours=('occ_comfort_hours', 'mean'),
    sum_occ_comfort_hours=('occ_comfort_hours', 'sum'),
    mean_discomfort_percent=('discomfort_percent', 'mean'),
    sum_discomfort_percent=('discomfort_percent', 'sum')
).reset_index()

In [7]:
# Create a new df to store the differences
diff_rows = []

# Iterate through each unique state
for state in grouped_df['state'].unique():
    # Get the baseline values for upgrade = 0
    baseline = grouped_df[(grouped_df['state'] == state) & (grouped_df['upgrade'] == '0')]
    
    if not baseline.empty:
        mean_baseline_occ_comfort_hours = baseline['mean_occ_comfort_hours'].values[0]
        mean_baseline_discomfort_percent = baseline['mean_discomfort_percent'].values[0]
        sum_baseline_occ_comfort_hours = baseline['sum_occ_comfort_hours'].values[0]
        sum_baseline_discomfort_percent = baseline['sum_discomfort_percent'].values[0]
        
        # Iterate through each upgrade for the state
        for upgrade in grouped_df[grouped_df['state'] == state]['upgrade'].unique():
            if upgrade != '0':
                current = grouped_df[(grouped_df['state'] == state) & (grouped_df['upgrade'] == upgrade)]
                if not current.empty:
                    mean_current_occ_comfort_hours = current['mean_occ_comfort_hours'].values[0]
                    mean_current_discomfort_percent = current['mean_discomfort_percent'].values[0]
                    sum_current_occ_comfort_hours = current['sum_occ_comfort_hours'].values[0]
                    sum_current_discomfort_percent = current['sum_discomfort_percent'].values[0]
                    
                    # Calculate differences
                    mean_occ_comfort_hours_diff = mean_current_occ_comfort_hours - mean_baseline_occ_comfort_hours
                    mean_discomfort_percent_diff = mean_current_discomfort_percent - mean_baseline_discomfort_percent
                    sum_occ_comfort_hours_diff = sum_current_occ_comfort_hours - sum_baseline_occ_comfort_hours
                    sum_discomfort_percent_diff = sum_current_discomfort_percent - sum_baseline_discomfort_percent
                    
                    # Append the row to the list
                    diff_rows.append({
                        "state": state,
                        "upgrade": upgrade,
                        "mean_occ_comfort_hours_diff": mean_occ_comfort_hours_diff,
                        "mean_discomfort_percent_diff": mean_discomfort_percent_diff,
                        "sum_occ_comfort_hours_diff": sum_occ_comfort_hours_diff,
                        "sum_discomfort_percent_diff": sum_discomfort_percent_diff
                    })

# Create the final df from the list of difference rows
diff_df = pd.DataFrame(diff_rows)

In [8]:
diff_df.head()

Unnamed: 0,state,upgrade,mean_occ_comfort_hours_diff,mean_discomfort_percent_diff,sum_occ_comfort_hours_diff,sum_discomfort_percent_diff
0,AL,1,24.24,-0.276712,1212,-13.835616
1,AL,10,-260.8,2.977169,-13040,148.858447
2,AL,11,6.3,-0.071918,315,-3.59589
3,AL,12,-32.04,0.365753,-1602,18.287671
4,AL,13,-32.26,0.368265,-1613,18.413242


In [17]:
# add weighted values
results = diff_df.copy()

results["weighted_sum_occ_comfort_hours_diff"] = results["sum_occ_comfort_hours_diff"] * 252.3

In [18]:
results.head()

Unnamed: 0,state,upgrade,mean_occ_comfort_hours_diff,mean_discomfort_percent_diff,sum_occ_comfort_hours_diff,sum_discomfort_percent_diff,weighted_sum_occ_comfort_hours_diff
0,AL,1,24.24,-0.276712,1212,-13.835616,305787.6
1,AL,10,-260.8,2.977169,-13040,148.858447,-3289992.0
2,AL,11,6.3,-0.071918,315,-3.59589,79474.5
3,AL,12,-32.04,0.365753,-1602,18.287671,-404184.6
4,AL,13,-32.26,0.368265,-1613,18.413242,-406959.9


In [22]:
state = results.sort_values(by="upgrade")
state.to_clipboard()

In [25]:
national = state.groupby("upgrade").agg({"mean_occ_comfort_hours_diff": "mean",
                              "mean_discomfort_percent_diff": "mean",
                              "weighted_sum_occ_comfort_hours_diff": "sum"
})
national.to_clipboard()