# Using the Dataloader

In [10]:
import numpy as np
import pandas as pd
import copy
import torch
import sys
import os
from pgmpy.models import BayesianModel
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.estimators import BayesianEstimator
from pgmpy.inference import VariableElimination
from torch.utils.data import Dataset, DataLoader
sys.path.append("./..")
import classes.powerdf as pdf
import matplotlib.pyplot as plt
from classes.data_loader import smarthome_dataset
import time

In [11]:
cur_path = os.path.abspath(os.getcwd())
dataset_path = os.path.join(cur_path,"cleaned_HomeC.csv")
print(dataset_path)
df = pd.read_csv(dataset_path, low_memory = False)
dataset = smarthome_dataset("cleaned_HomeC.csv" , dayahead = False)
print("dataset length", len(dataset))

/Users/meezy/Documents/Carnegie/10-708_F23/GRIDPredict/notebooks/cleaned_HomeC.csv
dataset length 503910


In [12]:
# create training and testing dataset
# fill missing values in
df.fillna(value = 0)
train_df = df.iloc[:3000]
test_df = df.iloc[4000:4400]
df = df.iloc[:]

traindata_loader = DataLoader(dataset, batch_size=10, shuffle=True,
                              num_workers=0)
testdata_loader = DataLoader(dataset, batch_size=10, shuffle=True,
                             num_workers=0)

"""
open = True
while (open):
    try:
        inputs, label = next(iter(traindata_loader))  
        print(inputs.shape)
        print(label.shape)
        #print("input {} : label {}".format(inputs,label))
        open = True
    except:
        open = False
"""

'\nopen = True\nwhile (open):\n    try:\n        inputs, label = next(iter(traindata_loader))  \n        print(inputs.shape)\n        print(label.shape)\n        #print("input {} : label {}".format(inputs,label))\n        open = True\n    except:\n        open = False\n'

The data loader returns input features corresponding to the weather features and the label corresponds to the label.

If dayahead is True (predicting next hour's solar generation based on previous 24 hourly weather features)...

\begin{align}
\text{Input Size}: & (\text{batch size } \times \text{ weather features } \times \text{ past 24 hour data points})\\
\text{Labels}: & (\text{batch size})
\end{align}

If dayahead is False (predicting hours solar generation based on current weather features)

\begin{align}
\text{Input Size}: & (\text{batch size } \times \text{ weather features })\\
\text{Labels}: & (\text{batch size})
\end{align}

In [13]:
# Using MLP
from networks.mlp_baseline import *
mlp_model = MLP_baseline(1,9)
optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.001)
loss_fn = kl_sum_loss

print("Beginning Training...")
print("data loader size", len(traindata_loader))
num_epochs = 1
for n in range(num_epochs):
    total_loss = 0
    for batch_idx, (x, y) in enumerate(traindata_loader):
        start_time = time.time()
        print("batch {}".format(batch_idx))
        x, y = next(iter(traindata_loader))
        print(x.shape)
        print(x)
        y_pred = mlp_model(x)
        end_time = time.time()
        print("exec time", end_time - start_time)
        loss = loss_fn(y_pred, y)
        total_loss = loss + total_loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (batch_idx > 500):
            break
    print("epoch loss:", loss.item())

torch.save(mlp_model, "mlp.pth")

Beginning Training...
data loader size 50391
batch 0
torch.Size([10, 9])
tensor([[1.0390e+01, 1.2000e-01, 2.7700e+02, 0.0000e+00, 2.0510e+01, 1.0140e+03,
         2.4300e+01, 9.3400e+00, 6.1000e-01],
        [4.4100e+00, 0.0000e+00, 2.4400e+02, 0.0000e+00, 1.9630e+01, 1.0028e+03,
         1.9300e+01, 5.2900e+00, 8.0000e-01],
        [4.4400e+00, 1.0000e+00, 3.3000e+01, 9.6000e-03, 3.1400e+01, 1.0136e+03,
         2.9630e+01, 5.1400e+00, 9.1000e-01],
        [7.3000e-01, 4.0000e-02, 4.8000e+01, 0.0000e+00, 4.8400e+01, 1.0226e+03,
         5.1080e+01, 8.7500e+00, 9.0000e-01],
        [2.9200e+00, 3.0000e-02, 2.9700e+02, 0.0000e+00, 3.3750e+01, 1.0224e+03,
         3.7690e+01, 1.0000e+01, 8.6000e-01],
        [7.6500e+00, 1.4000e-01, 1.9700e+02, 0.0000e+00, 5.8500e+01, 1.0166e+03,
         8.3650e+01, 1.0000e+01, 4.2000e-01],
        [5.0300e+00, 2.0000e-02, 3.3700e+02, 0.0000e+00, 3.4700e+01, 1.0192e+03,
         3.4450e+01, 9.8300e+00, 8.7000e-01],
        [3.6300e+00, 7.0000e-02, 6.000

In [14]:
# evaluation
error_terms = np.array([])
for batch_idx, (x, y) in enumerate(testdata_loader):
    print("batch {}".format(batch_idx))
    x, y = next(iter(testdata_loader))
    print(y)
    print(y_pred)
    y_pred = mlp_model(x)
    se_loss = (y - y_pred) * (y - y_pred)
    print("{} squared error loss {}".format(batch_idx, se_loss))
    for term in se_loss:
        np.append(term, se_loss.detach().numpy())
print("mean squared error:", np.mean(se_loss))

batch 0
tensor([ 0.1792,  0.1733, 14.1541,  0.0866,  0.2584,  0.1841, 20.9469,  0.1974,
         0.2575,  0.1983])
tensor([[-1.5820],
        [-0.8856],
        [-2.0485],
        [-1.0573],
        [-1.6285],
        [-1.1654],
        [-0.7965],
        [-1.1746],
        [-1.1335],
        [-0.2578]], grad_fn=<AddmmBackward0>)
0 squared error loss tensor([[ 12.6615,  12.6195, 307.4141,  12.0113,  13.2316,  12.6967, 591.7532,
          12.7913,  13.2246,  12.7980],
        [  7.9663,   7.9330, 282.1523,   7.4523,   8.4198,   7.9942, 556.4948,
           8.0693,   8.4142,   8.0746],
        [ 11.8952,  11.8545, 303.5915,  11.2653,  12.4480,  11.9294, 586.4449,
          12.0211,  12.4412,  12.0275],
        [  8.1302,   8.0966, 283.1238,   7.6109,   8.5883,   8.1584, 557.8586,
           8.2343,   8.5826,   8.2397],
        [  5.0796,   5.0531, 263.3718,   4.6709,   5.4431,   5.1020, 529.9888,
           5.1620,   5.4385,   5.1662],
        [  2.0648,   2.0479, 237.5262,   1.8074,   2

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.