In [None]:
import torch
import os
import sys
import copy

In [None]:
import matplotlib.pyplot as plt

In [None]:
import numpy as np
import networkx as nx

In [None]:
import wandb

In [None]:
device = "cuda:0"

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from src.cv import build_masks, crossvalidation

In [None]:
import torch_geometric.transforms as T
transform = T.Compose([T.ToUndirected(), T.AddSelfLoops()])

In [None]:
config = {
    "custom_optimizer": torch.optim.Adam,
    "lr" : 0.01,
    'epochs': 1500,
    'linear_layers': 2,
    'hidden_channels': 8,
    'dropout': 0.3,
    'test-percent': 30,
    'building_buffer': 60,
    'neighbor_radius': 150
}

In [None]:
from src.graph_construction import graph
nycgraph, nyc_rebuild_info = graph(
    "new-york",
    neighbor_radius=config['neighbor_radius'],
    building_buffer=config['building_buffer'],
    test_percent=config['test-percent']
)

# nycgraph = transform(nycgraph)

In [None]:
nycgraph['footprint'].y[:10]

In [None]:
config['input_shape'] = nycgraph['footprint'].x.shape[1]

In [None]:
nyc_rebuild_info['training_mask'].shape

In [None]:
from src.graph_construction import graph
austingraph, austin_rebuild_info = graph(
    "austin",
    neighbor_radius=config['neighbor_radius'],
    building_buffer=config['building_buffer'],
    test_percent=config['test-percent']
)

In [None]:
from src.graph_construction import graph
sfgraph, sf_rebuild_info = graph(
    "san-fransisco",
    neighbor_radius=config['neighbor_radius'],
    building_buffer=config['building_buffer'],
    test_percent=config['test-percent']
)

In [None]:
sfgraph

In [None]:
nyc_rebuild_info['training_mask'].sum()

In [None]:
(~nyc_rebuild_info['training_mask']).sum()

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
loss_func = torch.nn.MSELoss()
nyrebuild_idx = torch.tensor(nyc_rebuild_info['rebuild_idx'])

In [None]:
nyc_mask = nyc_rebuild_info['training_mask'].cpu().numpy()

In [None]:
np.random.seed(1)
nyc_cvs = build_masks(
    nyc_mask, 
    n_cv = 6,
    n_val = 300
)

In [None]:
nyc_mask.sum()

In [None]:
nyc_cvs[0][0].sum()

In [None]:
nyrebuild_idx.max()

In [None]:
valmean = []
valstd = []

trainmean = []
trainstd = []

np.random.seed(1)
nyc_cvs = build_masks(
    nyc_mask, 
    n_cv = 3,
    n_val = 500
)

nyX = nycgraph['footprint'].x[nyrebuild_idx].detach().cpu().numpy()
nyY = nycgraph['footprint'].y.cpu()

cvtrain_errors = []
cv_errors = []
for crossfold in nyc_cvs:
    nytrain, nyval = crossfold

    nytrain = nytrain[nyrebuild_idx]
    nyval = nyval[nyrebuild_idx]

    reg = LinearRegression().fit(nyX[nytrain], nyY[nytrain])
    overfitpred = reg.predict(nyX[nytrain])
    fitpred = reg.predict(nyX[nyval])

    overfitloss = float(loss_func(torch.tensor(overfitpred), nyY[nytrain]))
    regloss = float(loss_func(torch.tensor(fitpred), nyY[nyval]))

    cvtrain_errors.append(overfitloss)
    cv_errors.append(regloss)

linear_regtrainerr = np.mean(cvtrain_errors)
linear_regerr = np.mean(cv_errors)

trainmean.append(linear_regtrainerr)
valmean.append(linear_regerr)

trainstd.append(np.std(cvtrain_errors))
valstd.append(np.std(cv_errors))

In [None]:
fullreg = LinearRegression().fit(nyX, nyY)

In [None]:
# plt.plot(range(ntesting), trainmean, label="training")
# plt.fill_between(
#     range(ntesting), 
#     np.array(trainmean) - np.array(trainstd), 
#     np.array(trainmean) + np.array(trainstd),
#     alpha=0.2
# ) 

# plt.plot(range(ntesting), valmean, label="validation")
# plt.fill_between(
#     range(ntesting), 
#     np.array(valmean) - np.array(valstd), 
#     np.array(valmean) + np.array(valstd),
#     alpha=0.2
# )

# plt.ylim(0.3, 1.3)
# plt.legend()
# plt.show()

In [None]:
print(linear_regtrainerr)

In [None]:
print(linear_regerr)

In [None]:
linear_regtrainerr

In [None]:
linear_regerr

In [None]:
nycgraph['footprint'].x

In [None]:
nycgraph['footprint'].x

In [None]:
config

In [None]:
from src.model import NullModel

# input_shape = nycgraph['footprint'].x.shape[1]
nullmodel = NullModel(
    layers = config['linear_layers'],
    input_shape = config['input_shape'],
    hidden_channels = config['hidden_channels']
).to(device)

default_nullmodelstate = copy.deepcopy(nullmodel.state_dict())

null_training_tensor, null_validation_tensor = crossvalidation(
    nullmodel,
    lambda: nullmodel(nycgraph['footprint'].x),
    nyc_cvs,
    nyrebuild_idx,
    nycgraph['footprint'].y,
    epochs = config['epochs'],
    custom_optimizer = config['custom_optimizer'],
    lr = config['lr'],
    config = config,
    log_model = False
)

In [None]:
null_mean_tl = null_training_tensor.mean(axis=0)
null_mean_vl = null_validation_tensor.mean(axis=0)

plt.plot(null_mean_tl, label="Null Training")
plt.plot(null_mean_vl, label="Null Validation")

plt.legend()

plt.yscale("log")
plt.ylim((0,5))

plt.show()

In [None]:
config

In [None]:
nycgraph

In [None]:
from torch import nn
import torch_geometric.transforms as T
from torch_geometric.nn import GATConv, Linear, to_hetero, GATv2Conv
from torch_geometric.nn.conv.hetero_conv import HeteroConv

custom_graphconv = GATv2Conv

class CustomGAT(torch.nn.Module):
    def __init__(
            self, 
            hidden_channels:int, 
            out_channels:int, 
            layers:int,
            linear_layers:int,
            input_shape:int,
            heads:int = 1,
            dropout = 0.5
        ):
        super().__init__()
        self.layers = layers
        
        self.convs = torch.nn.ModuleList()
        self.lins = torch.nn.ModuleList()

        self.nullmodel = NullModel(
            layers = linear_layers,
            input_shape = input_shape,
            hidden_channels=hidden_channels
        )
        
        self.convs = torch.nn.ModuleList()
        for _ in range(layers):
            conv = HeteroConv({
                ('pano', 'links', 'pano'): custom_graphconv(-1, hidden_channels, add_self_loops = False, heads=heads),
                ('footprint', 'contains', 'pano'): custom_graphconv((-1, -1), hidden_channels, add_self_loops = False, heads=heads),
                ('pano', 'rev_contains', 'footprint'): custom_graphconv((-1, -1), hidden_channels, add_self_loops = False, heads=heads),
            }, aggr='sum')
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)
        self.mlp = nn.Sequential(
            nn.Dropout(p=0.8),
            nn.ReLU(),
            nn.Linear(out_channels * heads, hidden_channels),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(hidden_channels, hidden_channels),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(hidden_channels, 1)
        )

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: x.relu() for key, x in x_dict.items()}
        return self.mlp(x_dict['footprint'])
#         return x['pano']

# model = to_hetero(model, data.metadata(), aggr='sum').to(device)

In [None]:
# from src.model import CustomGAT

model = CustomGAT(
    hidden_channels = config['hidden_channels'], 
    out_channels=8,
    layers=2,
    heads=4,
    linear_layers = config['linear_layers'],
    input_shape = config['input_shape'],
    dropout = config['dropout']
).to(device)

default_modelstate = copy.deepcopy(model.state_dict())

In [None]:
graph_training_tensor, graph_validation_tensor = crossvalidation(
    model,
    lambda: model(nycgraph.x_dict, nycgraph.edge_index_dict),
    nyc_cvs,
    nyrebuild_idx,
    nycgraph['footprint'].y,
    epochs = config['epochs'],
    custom_optimizer = config['custom_optimizer'],
    lr = config['lr'],
    config = config,
    log_model = False
)

In [None]:
mean_tl = graph_training_tensor.mean(axis=0)
std_tl = graph_training_tensor.std(axis=0)

mean_vl = graph_validation_tensor.mean(axis=0)
std_vl = graph_validation_tensor.std(axis=0)

plt.plot(mean_tl, label="Graph Training")
plt.plot(mean_vl, label="Graph Validation")
plt.legend()

plt.yscale("log")
plt.ylim((0,2))

plt.show()

In [None]:
nycfootprints = nyc_rebuild_info['footprints']
nycfootprints['logenergy'] = np.log(nycfootprints.energy)

# nycfootprints.explore('logenergy')

In [None]:
domain = np.arange(0, len(mean_tl))

# plt.plot(domain, mean_tl, label="Training")
plt.plot(domain, mean_vl, label="Validation", color="lightblue")
# plt.fill_between(domain, mean_vl + std_vl, mean_vl - std_vl, alpha=0.1)

plt.plot(domain, null_mean_tl, label="Null Training", color="indianred", linestyle='dashed')
plt.plot(domain, null_mean_vl, label="Null Validation", color="salmon", linestyle='dashed')

plt.hlines(regloss, min(domain), max(domain), color='pink', label="linear regression")

plt.title(f"Loss Function with Training - lr: {config['lr']}")

plt.xlabel("Epoch")
plt.ylabel("Training Loss - MSE")
plt.legend()

plt.yscale("log")
# plt.ylim((0,2.5))

plt.show()

In [None]:
graph_improvement = (min(mean_vl) - min(null_mean_vl)) / min(null_mean_vl)
print("Improvement from Context: {:0.2f}%".format(100*graph_improvement))

In [None]:
def relative_benefit(errors, threshold):
    count_pass = (errors <= threshold).sum()
    return float(100 * (count_pass / len(errors)))

In [None]:
# now examining how this may generalize
model.eval()
nullmodel.eval()
with torch.no_grad():
    print("New York City")
    testidx = (~nyc_rebuild_info['training_mask']).detach().cpu().numpy()
    rebuilding_idx = np.array(nyc_rebuild_info['rebuild_idx'])

    linear_predictions = reg.predict(nycgraph['footprint'].x.cpu().numpy())[rebuilding_idx][testidx[rebuilding_idx]]
    null_predictions = nullmodel(nycgraph['footprint'].x).squeeze()[rebuilding_idx][testidx[rebuilding_idx]]
    estimates = model(nycgraph.x_dict, nycgraph.edge_index_dict).squeeze().detach()[rebuilding_idx][testidx[rebuilding_idx]]
    recorded = nycgraph['footprint'].y[testidx[rebuilding_idx]]

    linear_loss = loss_func(torch.tensor(linear_predictions).to(device), recorded)
    null_loss = loss_func(null_predictions, recorded)
    graph_loss = loss_func(estimates, recorded)
    
    print("Linear Loss:\t{:0.2f}".format(linear_loss))
    print("Null Loss:\t{:0.2f}".format(null_loss))
    print("Graph Loss:\t{:0.2f}".format(graph_loss))
    print("Improvement:\t{:0.2f}".format(100 * (graph_loss - null_loss)/null_loss))

In [None]:
linmae = torch.abs(torch.exp(recorded) - torch.tensor(np.exp(linear_predictions)).to(device))
nullmae = torch.abs(torch.exp(recorded) - torch.exp(null_predictions))
graphmae = torch.abs(torch.exp(recorded) - torch.exp(estimates))

In [None]:
threshold = 1000

print(f"Linear Benefit:\t\t{relative_benefit(linmae, threshold)}")
print(f"Null Benefit:\t\t{relative_benefit(nullmae, threshold)}")
print(f"Graph Benefit:\t\t{relative_benefit(graphmae, threshold)}")

In [None]:
torch.exp(null_predictions)

In [None]:
testidx = (~sf_rebuild_info['training_mask']).detach().cpu().numpy()
testidx

In [None]:
rebuilding_idx = np.array(sf_rebuild_info['rebuild_idx'])

In [None]:
# now examining how this may generalize
nullmodel.eval()
model.eval()
with torch.no_grad():
    print("San Fransisco")
    testidx = (~sf_rebuild_info['training_mask']).detach().cpu().numpy()
    rebuilding_idx = np.array(sf_rebuild_info['rebuild_idx'])
    null_predictions = nullmodel(sfgraph['footprint'].x).squeeze()[rebuilding_idx][testidx[rebuilding_idx]]
    
    linear_predictions = reg.predict(sfgraph['footprint'].x.cpu().numpy())[rebuilding_idx][testidx[rebuilding_idx]]
    estimates = model(sfgraph.x_dict, sfgraph.edge_index_dict).squeeze().detach()[rebuilding_idx][testidx[rebuilding_idx]]
    recorded = sfgraph['footprint'].y[testidx[rebuilding_idx]]

    linear_loss = loss_func(torch.tensor(linear_predictions).to(device), recorded)
    null_loss = loss_func(null_predictions, recorded)
    graph_loss = loss_func(estimates, recorded)
    print("Linear Loss:\t{:0.2f}".format(linear_loss))
    print("Null Loss:\t{:0.2f}".format(null_loss))
    print("Graph Loss:\t{:0.2f}".format(graph_loss))
    print("Improvement:\t{:0.2f}".format(100 * (graph_loss - null_loss)/null_loss))

In [None]:
nullmodel.eval()
model.eval()
with torch.no_grad():
    print("Austin Texas")
    testidx = (~austin_rebuild_info['training_mask']).detach().cpu().numpy()
    rebuilding_idx = np.array(austin_rebuild_info['rebuild_idx'])
    null_predictions = nullmodel(austingraph['footprint'].x).squeeze()[rebuilding_idx][testidx[rebuilding_idx]]
    
    linear_predictions = reg.predict(austingraph['footprint'].x.cpu().numpy())[rebuilding_idx][testidx[rebuilding_idx]]
    estimates = model(austingraph.x_dict, austingraph.edge_index_dict).squeeze().detach()[rebuilding_idx][testidx[rebuilding_idx]]
    recorded = austingraph['footprint'].y[testidx[rebuilding_idx]]
    
    null_loss = loss_func(null_predictions, recorded)
    graph_loss = loss_func(estimates, recorded)
    print("Linear Loss:\t{:0.2f}".format(linear_loss))
    print("Null Loss:\t{:0.2f}".format(null_loss))
    print("Graph Loss:\t{:0.2f}".format(graph_loss))
    print("Improvement:\t{:0.2f}".format(100 * (graph_loss - null_loss)/null_loss))

In [None]:
optim = config['custom_optimizer']
optim(model.parameters())

In [None]:
### I used the below cells to evaluate how many epochs to use for the final model

In [None]:
# from src.trainer import trainer
# from src.cv import reset_model

# model.load_state_dict(default_modelstate)
# model.train()

# trainmask = nyc_rebuild_info['training_mask']
# testmask = (~nyc_rebuild_info['training_mask'])

# rebuild_idx = np.array(nyc_rebuild_info['rebuild_idx'])

# nystate_dict_origin, trainlosses, testlosses = trainer(
#     model.to(device),
#     lambda: model(nycgraph.x_dict, nycgraph.edge_index_dict),
#     rebuild_idx,
#     trainmask[rebuild_idx],
#     testmask[rebuild_idx],
#     recorded = nycgraph['footprint'].y,
#     loss_func = loss_func,
#     config = config
# )

# nystate_dict = copy.deepcopy(nystate_dict_origin)

In [None]:
# from src.trainer import trainer
# from src.cv import reset_model

# nullmodel.load_state_dict(default_nullmodelstate)

# trainmask = nyc_rebuild_info['training_mask']
# testmask = (~nyc_rebuild_info['training_mask'])

# rebuild_idx = np.array(nyc_rebuild_info['rebuild_idx'])

# nystate_dict_origin, trainlosses, testlosses = trainer(
#     nullmodel.to(device),
#     lambda: nullmodel(nycgraph['footprint'].x),
#     rebuild_idx,
#     trainmask[rebuild_idx],
#     testmask[rebuild_idx],
#     recorded = nycgraph['footprint'].y,
#     loss_func = loss_func,
#     config = config
# )

# nynull_state_dict = copy.deepcopy(nystate_dict_origin)

In [None]:
# plt.plot(trainlosses, label="training")
# plt.plot(testlosses, label="testing")

# plt.legend()
# plt.yscale("log")

# plt.show()

In [None]:
# this is manually set based on the above graph^
from src.trainer import trainer

config['epochs'] = 500

model.load_state_dict(default_modelstate)

trainmask = nyc_rebuild_info['training_mask']
testmask = (~nyc_rebuild_info['training_mask'])

trainmask = np.repeat([True], len(trainmask))
rebuild_idx = np.array(nyc_rebuild_info['rebuild_idx'])

nystate_dict_origin, trainlosses, testlosses = trainer(
    model.to(device),
    lambda: model(nycgraph.x_dict, nycgraph.edge_index_dict),
    rebuild_idx,
    trainmask[rebuild_idx],
    testmask[rebuild_idx],
    recorded = nycgraph['footprint'].y,
    loss_func = loss_func,
    config = config,
    fulldata = True
)

nystate_dict = copy.deepcopy(nystate_dict_origin)

In [None]:
nullmodel.train()
nullmodel.load_state_dict(default_nullmodelstate)

trainmask = nyc_rebuild_info['training_mask']
testmask = (~nyc_rebuild_info['training_mask'])

trainmask = np.repeat([True], len(trainmask))
rebuild_idx = np.array(nyc_rebuild_info['rebuild_idx'])

nystate_dict_origin, trainlosses, testlosses = trainer(
    nullmodel.to(device),
    lambda: nullmodel(nycgraph['footprint'].x),
    rebuild_idx,
    trainmask[rebuild_idx],
    testmask[rebuild_idx],
    recorded = nycgraph['footprint'].y,
    loss_func = loss_func,
    config = config,
    fulldata = True
)

nystate_nulldict = copy.deepcopy(nystate_dict_origin)

In [None]:
np.vstack((nyX, sfgraph['footprint'].x[:50].cpu().detach())).shape

In [None]:
from tqdm import tqdm_notebook

# now exploring how the model might generalize to SF
sf_linearvalloss = []
sf_valloss = []
sf_nullvalloss = []

config['epochs'] = 1000

for n_true in tqdm_notebook(range(100), leave=True):
    model.load_state_dict(nystate_dict)
    nullmodel.load_state_dict(nystate_nulldict)
    
    model.train()
    nullmodel.train()

    sf_trainmask = sf_rebuild_info['training_mask']
    sf_testmask = (~sf_rebuild_info['training_mask'])

    sf_to_false = np.where(sf_trainmask.cpu().numpy())[0]
    sf_to_false

    np.random.seed(1)
    drip_idx = np.random.choice(sf_to_false, n_true, replace=False)

    # this now just drips in a bit of the sf data
    trainmask = np.repeat([False], len(sf_trainmask))
    trainmask[drip_idx] = True

    rebuild_idx = np.array(sf_rebuild_info['rebuild_idx'])
    
    # shapes
    # print(f"trainmask shape: {trainmask.shape}")
    
    ## building the linear model
    x_linear_addition = np.vstack((nyX, sfgraph['footprint'].x[rebuild_idx][trainmask[rebuild_idx]].cpu().detach()))
    
    # print(f"Y shape: {nyY.shape}.")
    # print(f"trainyshape: {sfgraph['footprint'].y[trainmask[rebuild_idx]].cpu().detach().shape}")
    y_linear_addition = np.concatenate((nyY, sfgraph['footprint'].y[trainmask[rebuild_idx]].cpu().detach()), axis=None)
    
    reg = LinearRegression().fit(x_linear_addition, y_linear_addition)
    preds = reg.predict(sfgraph['footprint'].x[rebuild_idx][testmask[rebuild_idx]].cpu().detach())
    existing_terms = sfgraph['footprint'].y[testmask[rebuild_idx]].cpu().detach()
    # print(f"Predictions: {preds}")
    # print(f"Existing: {existing_terms}")
    sf_linearvalloss.append(loss_func(torch.tensor(preds), existing_terms))

    sf_state_dict, sf_trainlosses, sf_testlosses = trainer(
        model.to(device),
        lambda: model(sfgraph.x_dict, sfgraph.edge_index_dict),
        rebuild_idx,
        trainmask,
        testmask,
        recorded = sfgraph['footprint'].y,
        loss_func = loss_func,
        config = config
    )
    
    _, _, sf_nulltestlosses = trainer(
        nullmodel.to(device),
        lambda: nullmodel(sfgraph['footprint'].x),
        rebuild_idx,
        trainmask,
        testmask,
        recorded = sfgraph['footprint'].y,
        loss_func = loss_func,
        config = config
    )
    sf_valloss.append(min(sf_testlosses))
    sf_nullvalloss.append(min(sf_nulltestlosses))

In [None]:
# plt.plot(sf_trainlosses, label="training")
plt.plot(sf_linearvalloss, label="Linear", color="coral")
plt.plot(sf_nullvalloss, label="Null", color="orange")
plt.plot(sf_valloss, label="Graph", color="lightblue")

plt.legend()
plt.yscale("log")

plt.title("Generalization from New York to San Fransisco")
plt.xlabel("# Buildings from San Fransisco")
plt.ylabel("Loss - RMSE")

plt.show()

In [None]:
austingraph

In [None]:
from tqdm import tqdm_notebook

# now exploring how the model might generalize to SF
austin_linearvalloss = []
austin_valloss = []
austin_nullvalloss = []

config['epochs'] = 1000

for n_true in tqdm_notebook(range(10), leave=True):
    model.load_state_dict(nystate_dict)
    nullmodel.load_state_dict(nystate_nulldict)

    austin_trainmask = austin_rebuild_info['training_mask']
    austin_testmask = (~austin_rebuild_info['training_mask'])

    austin_to_false = np.where(austin_trainmask.cpu().numpy())[0]
    austin_to_false

    np.random.seed(1)
    drip_idx = np.random.choice(austin_to_false, n_true, replace=False)

    # this now just drips in a bit of the sf data
    trainmask = np.repeat([False], len(austin_trainmask))
    trainmask[drip_idx] = True

    rebuild_idx = np.array(austin_rebuild_info['rebuild_idx'])
    
    # shapes
    # print(f"trainmask shape: {trainmask.shape}")
    
    ## building the linear model
    x_linear_addition = np.vstack((nyX, austingraph['footprint'].x[rebuild_idx][trainmask[rebuild_idx]].cpu().detach()))
    
    # print(f"Y shape: {nyY.shape}.")
    # print(f"trainyshape: {sfgraph['footprint'].y[trainmask[rebuild_idx]].cpu().detach().shape}")
    y_linear_addition = np.concatenate((nyY, austingraph['footprint'].y[trainmask[rebuild_idx]].cpu().detach()), axis=None)
    
    reg = LinearRegression().fit(x_linear_addition, y_linear_addition)
    preds = reg.predict(austingraph['footprint'].x[rebuild_idx][testmask[rebuild_idx]].cpu().detach())
    existing_terms = austingraph['footprint'].y[testmask[rebuild_idx]].cpu().detach()
    # print(f"Predictions: {preds}")
    # print(f"Existing: {existing_terms}")
    austin_linearvalloss.append(loss_func(torch.tensor(preds), existing_terms))

    austin_state_dict, austin_trainlosses, austin_testlosses = trainer(
        model.to(device),
        lambda: model(austingraph.x_dict, austingraph.edge_index_dict),
        rebuild_idx,
        trainmask,
        testmask,
        recorded = austingraph['footprint'].y,
        loss_func = loss_func,
        config = config
    )
    
    _, _, austin_nulltestlosses = trainer(
        nullmodel.to(device),
        lambda: nullmodel(austingraph['footprint'].x),
        rebuild_idx,
        trainmask,
        testmask,
        recorded = austingraph['footprint'].y,
        loss_func = loss_func,
        config = config
    )
    austin_valloss.append(min(austin_testlosses))
    austin_nullvalloss.append(min(austin_nulltestlosses))

In [None]:
# plt.plot(sf_trainlosses, label="training")
plt.plot(austin_linearvalloss, label="Linear", color="firebrick")
plt.plot(austin_nullvalloss, label="Null", color="lightsalmon")
plt.plot(austin_valloss, label="Graph", color="lightblue")

plt.legend()
plt.yscale("log")

plt.title("Generalization from New York to Austin")
plt.xlabel("# Buildings from Austin")
plt.ylabel("Loss - RMSE")

plt.show()

In [None]:
# ookaaay now I want to see how the model might generalize

In [None]:
# param_size = 0
# for param in model.parameters():
#     param_size += param.nelement() * param.element_size()
# buffer_size = 0
# for buffer in model.buffers():
#     buffer_size += buffer.nelement() * buffer.element_size()

# size_all_mb = (param_size + buffer_size) / 1024**2
# print('model size: {:.3f}MB'.format(size_all_mb))