## Imports

In [2]:
!pip install s3fs



In [3]:
import pandas as pd
import random
import os
import s3fs

import numpy as np
import matplotlib.pyplot as plt
import torch

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor

from tqdm import tqdm
from sklearn import preprocessing

## Model

In [5]:
# Download model weights
!gdown 1UQAjyMA4cThsv7-fDO_nt7tb3AZR2kyd

Downloading...
From: https://drive.google.com/uc?id=1UQAjyMA4cThsv7-fDO_nt7tb3AZR2kyd
To: /content/CarbonReductionCNN.pth
100% 287M/287M [00:05<00:00, 51.9MB/s]


In [6]:
class RepresentationCNN(nn.Module):
    def __init__(self):
        super(RepresentationCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=2, out_channels=64, kernel_size=5)
        self.p = 2
        self.pool = nn.MaxPool1d(kernel_size=self.p)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5)

    def forward(self, x1):
        x = self.pool(F.relu(self.conv1(x1)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        return x

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RepresentationCNN()
model.to(device)

state_dict = torch.load('CarbonReductionCNN.pth', map_location=torch.device("cpu"))

# Get rid of excess keys
for key in ["fc1.weight", "fc1.bias", "fc2.weight", "fc2.bias", "fc3.weight", "fc3.bias", "output_layer.weight","output_layer.bias"]:
  del state_dict[key]
model.load_state_dict(state_dict)
model.eval()

RepresentationCNN(
  (conv1): Conv1d(2, 64, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 128, kernel_size=(5,), stride=(1,))
)

Load Data

In [None]:
# loading X1 data from google drive (modify as needed for X2, Y):
!gdown 1Zyj0fvbzE8fBmmrqzQemcMh4Yhr6I3YD
!gdown 1-eb8dbgukvtKjtNJM2mCp-mXJW_AtcFJ
# load into numpy array called x1
x1 = np.load('X1.npy')
x2 = np.load('X2.npy')

Downloading...
From: https://drive.google.com/uc?id=1Zyj0fvbzE8fBmmrqzQemcMh4Yhr6I3YD
To: /content/X1.npy
100% 701M/701M [00:06<00:00, 117MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-eb8dbgukvtKjtNJM2mCp-mXJW_AtcFJ
To: /content/X2.npy
100% 1.28M/1.28M [00:00<00:00, 92.1MB/s]


In [None]:
# Download id array
!gdown 1-vMm_11S3H5qJj5Ju4fBl6s0HuasaxFU
id_array = np.load("id_arr.npy")

Downloading...
From: https://drive.google.com/uc?id=1-vMm_11S3H5qJj5Ju4fBl6s0HuasaxFU
To: /content/id_arr.npy
  0% 0.00/120k [00:00<?, ?B/s]100% 120k/120k [00:00<00:00, 73.9MB/s]


In [None]:
# Download baseline metadata
base_url_1_1 = 'https://oedi-data-lake.s3.amazonaws.com/nrel-pds-building-stock/end-use-load-profiles-for-us-building-stock/2022/resstock_amy2018_release_1.1/'
state = "MA"

# Get Baseline Metadata for all buildings in MA
upgrade = 0
upgrade_str = 'baseline' if upgrade==0 else f'upgrade{upgrade:02d}'
df_metadata_baseline = pd.read_parquet(f'{base_url_1_1}metadata/{upgrade_str}.parquet', filters=[('in.state','==',state)])

In [None]:
# Which values to query
meta_val = {'in.geometry_building_type_acs':['Single-Family Detached', '2 Unit', '3 or 4 Unit', 'Single-Family Attached', '10 to 19 Unit', '5 to 9 Unit', '50 or more Unit', '20 to 49 Unit', 'Mobile Home']
            ,'in.sqft':[1220.0, 3301.0, 853.0, 1690.0, 12291.0, 617.0, 633.0, 885.0, 1202.0, 2152.0, 8194.0, 3138.0, 1138.0, 1623.0, 2176.0, 1675.0, 333.0, 2663.0, 2115.0, 13414.0, 328.0, 3241.0, 2631.0, 866.0, 2590.0, 317.0]
            ,'in.bedrooms':['4', '3', '1', '2', '5']
            ,'in.geometry_stories':['2', '1', '3', '6', '4', '10', '5', '21', '14', '11', '8', '9', '13', '15', '20', '12', '7']
            ,'in.vintage':['<1940', '1960s', '1970s', '1950s', '1990s', '1940s', '1980s', '2000s', '2010s']
            ,'in.hvac_cooling_type':['Central AC', 'None', 'Room AC', 'Heat Pump']
            ,'in.hvac_heating_type_and_fuel':['Natural Gas Fuel Furnace', 'Natural Gas Fuel Boiler', 'Electricity Baseboard', 'Fuel Oil Fuel Boiler', 'Natural Gas Fuel Wall/Floor Furnace', 'Natural Gas Shared Heating', 'Fuel Oil Fuel Furnace', 'Electricity ASHP', 'Electricity Electric Furnace', 'Electricity Shared Heating', 'None', 'Fuel Oil Fuel Wall/Floor Furnace', 'Propane Fuel Wall/Floor Furnace', 'Fuel Oil Shared Heating', 'Propane Fuel Furnace', 'Propane Fuel Boiler', 'Electricity Electric Boiler', 'Propane Shared Heating', 'Electricity Electric Wall Furnace']
            ,'in.windows':['Double, Low-E, Non-metal, Air, M-Gain', 'Double, Clear, Non-metal, Air', 'Single, Clear, Non-metal', 'Single, Clear, Non-metal, Exterior Clear Storm', 'Double, Clear, Non-metal, Air, Exterior Clear Storm', 'Double, Clear, Metal, Air', 'Single, Clear, Metal', 'Triple, Low-E, Non-metal, Air, L-Gain', 'Double, Clear, Metal, Air, Exterior Clear Storm', 'Single, Clear, Metal, Exterior Clear Storm']
            ,'in.pv_system_size':['None', '5.0 kWDC', '9.0 kWDC', '13.0 kWDC', '7.0 kWDC', '11.0 kWDC', '3.0 kWDC', '1.0 kWDC']
            ,'in.electric_vehicle':['None']}

# Query the metadata values
metadata_baseline = df_metadata_baseline[meta_val.keys()]
# Size of each metadata
meta_size = {'in.geometry_building_type_acs':9
             ,'in.sqft':26
            ,'in.bedrooms':5
            ,'in.geometry_stories':17
            ,'in.vintage':9
            ,'in.hvac_cooling_type':4
            ,'in.hvac_heating_type_and_fuel':19
            ,'in.windows':10
            ,'in.pv_system_size':8
            ,'in.electric_vehicle':1}

meta_start = {'in.geometry_building_type_acs':0
             ,'in.sqft':9
            ,'in.bedrooms':35
            ,'in.geometry_stories':40
            ,'in.vintage':57
            ,'in.hvac_cooling_type':66
            ,'in.hvac_heating_type_and_fuel':70
            ,'in.windows':89
            ,'in.pv_system_size':99
            ,'in.electric_vehicle':107}

meta_idx = {}
for key, value in meta_val.items():
  meta_idx[key] = {v:i+meta_start[key] for i,v in enumerate(value)}
arr = np.zeros(sum(meta_size.values()))

reference_houses = []
for i in range(len(id_array)):
  unencoded_m = metadata_baseline.loc[int(id_array[i])]
  encoded_m = np.zeros(sum(meta_size.values()))
  for key, value in meta_val.items():
    if unencoded_m[key] in meta_val[key]:
      arr[meta_idx[key][unencoded_m[key]]] = 1
  h = x1[i,:,:] # time series vector
  reference_houses.append({"h":h, ""})


KNN Model

In [8]:
# Model parameters
n = 8760 # Length of time series data
nm = 108 # Length of meta data vector
nh = 100 # number of reference houses

# Generate random data for initial testing
x1 = torch.randn(2, n)
reference_houses = []
for i in range(nh):
  """
  Generate the following for each pair:
  h - vector of length n that is time series history
  m - meta data vector of length nm
  """
  h = torch.randn(2, n)
  m = torch.randn(nm)
  reference_houses.append({"h":h, "m":m})

In [27]:
# Element of metadata vector to estimate
# Takes in a list of dictionaries with elements h (house curve) and m (datapoint).

# Extract features from the model for each 'h' vector in the list of dictionaries
model_features = []
ground_truth_labels = []

with torch.no_grad():
    for sample in reference_houses:
        h_vector = torch.tensor(sample['h']).unsqueeze(0) # The unsqueeze treats it as a batch of size 1
        features = model(h_vector)
        model_features.append(features.numpy())
        ground_truth_labels.append(sample['m'].numpy())

model_features = np.vstack(model_features)
ground_truth_labels = np.array(ground_truth_labels)
print(model_features.shape)

# Use KNeighborsRegressor for k-nearest neighbors regression
knn_regressor = KNeighborsRegressor(n_neighbors=5)  # We need to train this (k)
knn_regressor.fit(model_features, ground_truth_labels)

# Extract features from the model for the new input
with torch.no_grad():
    new_features = model(x1.unsqueeze(0)).numpy()

# Use k-nearest neighbors regression to predict the associated parameter
predicted_parameter = knn_regressor.predict(new_features)

print("Predicted Parameter:", predicted_parameter)

  h_vector = torch.tensor(sample['h']).unsqueeze(0) # The unsqueeze treats it as a batch of size 1


(100, 279936)
Predicted Parameter: [[-0.29318708 -0.57841766  0.2609051  -0.04045204  0.4652711  -0.9922411
  -0.02817643  0.2679621  -0.2757408   0.5411051  -0.8108632   0.23123708
  -0.44627905  0.42733422  0.54207724 -0.35100746 -0.20731036  0.00235975
   0.01730009 -0.17936869 -0.08451024  0.2750892  -0.5028862   0.04998841
   0.6690006  -0.47587937  0.25258863  0.03954196  0.42919964 -0.29237446
  -0.4599409   0.16075596  0.15920177  0.52885085  0.16495344  0.21125786
   0.05366431 -0.19257653  0.7002642   0.28609315  0.15585962  0.5858702
  -0.12799466  0.48179016 -0.08245639 -0.33425197  0.39917064 -0.12051062
   0.3360301   0.3136806  -0.09081782  0.06963557  0.28379342 -0.6988869
   0.21281381  0.0054749   0.02573469  0.8724214  -0.15051478  0.76844585
  -0.42279243  0.10073581 -0.34007764 -0.83645904 -0.05297845  0.5842994
   0.27944636 -0.4677124  -0.08838948  0.38487583  0.36976495 -0.32373342
  -0.37726635 -0.18085246 -0.55131185 -0.24948108 -0.2803073  -0.39039648
   1.31