In [None]:
import numpy as np
import torch
from torch import nn, optim
from torch.autograd import Variable
import torch.nn.functional as F
import pandas as pd
import time
import math
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

In [1]:
df = pd.read_csv ('data_Ithaca.csv')
data = df.values
# 5th column for ground truth solar, 20th column for disaggregated solar
# to include GHI, add 9th column
names = df.columns[[5,10,23,24,25,26,6,27,0,2,22]]
# names = df.columns[[5,10,23,24,25,26,6,27,9,0,2,22]]
print(names)
data = data[:,[5,10,23,24,25,26,6,27,0,2,22]]
mean_s, std_s, min_s, max_s = np.mean(data[:,0]), np.std(data[:,0]), np.min(data[:,0]), np.max(data[:,0])
print('mean',mean_s, 'std',std_s, 'max',max_s, 'min',min_s)
# normalize entire dataset to [0,1]
data[:,0] = (data[:,0]-min_s)/(max_s-min_s)
data = data.astype('float32')
for i in [1,2,3,4,5,6,7]:
    min_temp, max_temp = np.min(data[:,i]), np.max(data[:,i]) 
    data[:,i] = (data[:,i]-min_temp)/(max_temp-min_temp)
print(data.shape)

In [None]:
# employ sliding window to stack data correspondingly
def Create_dataset(dataset):
    data_X, data_Y, data_Z = [], [], []
    dt = dataset.tolist()
    for i in range(len(dataset)-820): 
        temp = dataset[i+724:i+820,:]
        temp1 = temp[:,0].reshape(-1,1)
        temp2 = temp[:,1:8].reshape(-1,1)
#         temp2 = temp[:,1:9].reshape(-1,1) # including GHI
        temp3 = temp[:,8:].reshape(-1,1) 
#         temp3 = temp[:,9:].reshape(-1,1)  # including GHI
       
        data_X.append(temp2)
        data_Y.append(temp1)
        data_Z.append(temp3)
    
    return np.array(data_X), np.array(data_Y), np.array(data_Z)

In [None]:
# encode categorical input features
# omit weekday here as it's irrelevent to solar generation
one_hot = lambda label,num_classes: F.one_hot(label.long(), num_classes=num_classes).type(torch.float32)
cycl_ = lambda x,num_classes : torch.tensor((np.sin(x / num_classes * 2 * np.pi),np.cos(x / num_classes * 2 * np.pi))).type(torch.float32)

def Calender(ty):
    calender = []
    for i in range(ty.shape[0]):
        tty = ty[i]
        temp = torch.zeros(96,12)
        for j in range(96):
            H = torch.unsqueeze(cycl_(tty[3*j+1],96),0)
            C = one_hot(tty[3*j+2],10)
            temp[j,:] = torch.cat((H,C),1)
        temp = temp.reshape(1,-1)  
        calender.append(temp.tolist())
    return torch.tensor(calender)

In [None]:
# evenly divide the whole dataset into five folds for cross validation
test_indice1 = list(range(0,672))+list(range(3360,3360+672))+list(range(3360*2,3360*2+672))+list(range(3360*3,3360*3+672))+list(range(3360*4,3360*4+672))
test_indice2 = list(range(672,672*2))+list(range(3360+672,3360+672*2))+list(range(3360*2+672,3360*2+672*2))+list(range(3360*3+672,3360*3+672*2))+list(range(3360*4+672,3360*4+672*2))
test_indice3 = list(range(672*2,672*3))+list(range(3360+672*2,3360+672*3))+list(range(3360*2+672*2,3360*2+672*3))+list(range(3360*3+672*2,3360*3+672*3))+list(range(3360*4+672*2,3360*4+672*3))
test_indice4 = list(range(672*3,672*4))+list(range(3360+672*3,3360+672*4))+list(range(3360*2+672*3,3360*2+672*4))+list(range(3360*3+672*3,3360*3+672*4))+list(range(3360*4+672*3,3360*4+672*4))
test_indice5 = list(range(672*4,672*5))+list(range(3360+672*4,3360+672*5))+list(range(3360*2+672*4,3360*2+672*5))+list(range(3360*3+672*4,3360*3+672*5))+list(range(3360*4+672*4,3360*4+672*5))
train_indice = list(range(0,16844))

In [None]:
# generate input features data_X and output labels data_Y
data_X, data_Y, data_Z = Create_dataset(data)
data_Z = np.array(Calender(torch.tensor(data_Z)))
data_Z = np.transpose(data_Z,(0,2,1))
data_X = np.concatenate((data_X,data_Z),1)

In [None]:
# generate training data and testing data respectively for each one of the five folds
train_X, test_X = [data_X[index] for index in train_indice if index not in test_indice1], [data_X[index] for index in test_indice1]
train_Y, test_Y = [data_Y[index] for index in train_indice if index not in test_indice1], [data_Y[index] for index in test_indice1]
train_X, train_Y = np.array(train_X), np.array(train_Y)
test_X, test_Y = np.array(test_X), np.array(test_Y)

In [None]:
class Train(Dataset):
    def __init__(self, data):
        self.weather, self.calender, self.label = data[:,:672,:].float(), data[:,672:1824,:].float(), data[:,-96:,:].float()
#         self.weather, self.calender, self.label = data[:,:768,:].float(), data[:,768:1920,:].float(), data[:,-96:,:].float() # including GHI

    def __getitem__(self, index):
        return self.weather[index], self.calender[index], self.label[index]

    def __len__(self):
        return len(self.weather)

In [None]:
# load training data into DataLoader
train_loader = DataLoader(Train(torch.cat((torch.tensor(train_X),torch.tensor(train_Y)),1)), batch_size=500, shuffle=True)

In [None]:
# FCNN model
class ANN(nn.Module):
    
    def __init__(self):
        super(ANN, self).__init__()
        
        self.ann = nn.Sequential(
            nn.Linear(19,40),
            # nn.Linear(20,40), # including GHI
            nn.Tanh(),
        )
        self.out = nn.Sequential(
            nn.Linear(40,1),
            nn.ReLU(),
        )
    
    def forward(self, x, z):
        
        batch = x.shape[0]
        x = torch.reshape(x,(batch,96,-1))        
        z = torch.reshape(z,(batch,96,-1))
        xz = torch.cat((x,z),2)
        txz = self.ann(xz) 
        out = self.out(txz)
        return out

In [None]:
# hyper-parameters for training process
LR = 0.01
EPOCH = 500
Loss = []
best_loss = 100
state = None
cal_loss = nn.MSELoss()

In [None]:
# training FCNN model
model = ANN()
optimizer = torch.optim.Adam(model.parameters(), lr=LR) 
for i in range(EPOCH):
    for j, entry in enumerate(train_loader):
        tx, ty, tz = entry
        tx = torch.transpose(tx,1,2)
        ty = torch.transpose(ty,1,2)
        final_out = model(tx,ty)
        loss = cal_loss(final_out, tz)
        optimizer.zero_grad()
        loss.backward()  
        optimizer.step()
        Loss.append(loss.detach().numpy())
    print('epoch{}'.format(i+1), loss.detach().numpy())
    if loss.detach().numpy() < best_loss:
        best_loss = loss.detach().numpy()
        torch.save(model, 'ithaca_solar'.format(loss.detach().numpy()))
        print('new fcnn saved at epoch {} with loss {}'.format(i+1, best_loss))

In [None]:
plt.figure(figsize=(20,6))
plt.plot(Loss,'b')
plt.title('Training Loss for Solar',fontsize=15)
plt.savefig('training_loss.png')

In [None]:
# generate prediction for testing set with trained fcnn model
model = torch.load('ithaca_solar')
model.eval()
test_weather = test_X[:,:672,:]
test_weather = torch.tensor(test_weather, dtype=torch.float32)
test_weather = torch.transpose(test_weather,1,2)
test_date = test_X[:,672:,:]
test_date = torch.tensor(test_date, dtype=torch.float32)
prediction = model(test_weather,test_date)
torch.save(prediction, 'prediction_solar.pt')

In [None]:
# generate input features with simulated GHI
mu, sigma = 0, 10
error = np.random.normal(mu, sigma, 17664)
# ensure zero GHI for timeslots without sunlight
indice = np.where(data[:,8]==0)
error[indice] = 0
data[:,8] += error
# ensure positive simulated GHI
data[:,8] = np.where(data[:,8]>0, data[:,8], data[:,8]==0)

In [None]:
data_X_sim, _, _ = Create_dataset(data)
data_XX = np.concatenate((data_X_sim,data_Z),1)
test_X_sim = [data_XX[index] for index in test_indice1]
test_X_sim = np.array(test_X_sim)

In [None]:
# generate prediction for testing set with trained fcnn model, 
# which incorporates ground truth GHI as input feature during training
model = torch.load('ithaca_solar_GHI')
model.eval()
test_weather = test_X_sim[:,:768,:]
test_weather = torch.tensor(test_weather, dtype=torch.float32)
test_weather = torch.transpose(test_weather,1,2)
test_date = test_X_sim[:,768:,:]
test_date = torch.tensor(test_date, dtype=torch.float32)
prediction_GHI = model(test_weather,test_date)
torch.save(prediction, 'prediction_solar_GHI.pt')