# Solar and Building Forecasting
## Load prediction functions

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import keras
from methods import read_pickle
from methods import find_index_v2
from methods import RM_model
from methods import NN_model
from methods import building_methods
data_cleanned_v2 = read_pickle(path_name = 'data\data_processed_phase2.pickle')
## GPU check
import tensorflow as tf
tf.config.list_physical_devices('GPU')

## Solar Prediction:
### Due to the observed capacity increase from May 2020 on Solar 3, we replaced partial data of Solar3 with Solar2's data (They shared the same start time and end time)
- This is an experiense and domain knowledge based calibration, so we separatly disscused this in notebook Evaluation 

In [None]:
datetime = pd.to_datetime('2020-05-20 00:00:00')
clip_index = find_index_v2(data_cleanned_v2, 'Solar2', datetime)
data_cleanned_v2['Solar3']['Solar3'][:clip_index] = data_cleanned_v2['Solar2']['Solar2'][:clip_index] 
data_cleanned_v2['Solar5'] =  data_cleanned_v2['Solar5'][96*300:]
data_cleanned_v2['Solar0'] =  data_cleanned_v2['Solar0'][96*120:]

### Main function for replicating solar genearion predictions
- Result might be slightly different depending on the training process
- For the simplicity, we didn't specified attentions and similarity thresholds for the RM discovery as we did in the previous paper, which might further improve the accuracy but also need further studies on different solars

In [None]:
def RM_prediction(data_cleanned, user_type, NN_type):
    ## motifs discovery range and training range (noramlly the same)
    train_days = int(len(data_cleanned[user_type])/96)
    RM = RM_model(data_cleanned)
    motifs_range = [-24*4*train_days,0]
    train_range = [-24*4*train_days,0] 
    ## Here we didn't specified attentions and similarity thresholds for the RM discovery 
    motifs, motif_data, motif_pattern, temperature_motif, cloudcover_motif, humidity_motif, radiation_motif, times_motif = RM.motifs_discovery(user_type, motifs_range)
    ### training data
    train_data = data_cleanned[user_type][train_range[0]:].copy() if train_range[1] ==0 else data_cleanned[user_type][train_range[0]:train_range[1]].copy()
    y_train = train_data[user_type].values.reshape(train_days, 96) if user_type.find('Solar') != -1 else train_data['consumption'].values.reshape(train_days, 96)
    cloudcover_train = train_data ['total_cloud_cover (0-1)'].values.reshape(train_days, 96)
    humidity_train = train_data ['relative_humidity ((0-1))'].values.reshape(train_days, 96)
    radiation_train = train_data ['surface_solar_radiation (W/m^2)'].values.reshape(train_days, 96)
    temperature_train = train_data['temperature (degC)'].values.reshape(train_days, 96)
    temperature_train = [i-temperature_motif.values for i in temperature_train] 
    cloudcover_train = [i-cloudcover_motif.values for i in cloudcover_train] 
    humidity_train = [i-humidity_motif.values for i in humidity_train]
    radiation_train = [i-radiation_motif.values for i in radiation_train]
    times_train = pd.CategoricalIndex(train_data.index.time).codes.astype(float)
    sun_cycle_train = np.maximum(np.sin(2*np.pi *(times_train+16)/96), 0 ).reshape(train_days, 96)
    times_train = times_train.reshape(train_days, 96)
    weekday_train = pd.CategoricalIndex(train_data.index.weekday).codes.astype(float).reshape(train_days, 96)
    motifs_train = [motif_pattern for i in range(train_days)]
    motifs_train = np.array(motifs_train)
    month_train = pd.CategoricalIndex(train_data.index.month).codes.astype(float).reshape(train_days, 96)
    year_cycle_train = abs(np.sin(2*np.pi*(month_train-5)/24))
    year_cycle_train = year_cycle_train.reshape(train_days, 96)
    weekend_train = np.where((weekday_train == 5) | (weekday_train == 6), True, False)
    if user_type.find('Solar') != -1:
        if user_type = 'Solar0':
            ## Solar 0 has data less than a year hence the yearly cycle needs to be adjusted 
            year_cycle_train = abs(np.sin(2*np.pi*(month_train-1)/24))
        x_train = np.column_stack((motifs_train, cloudcover_train, radiation_train, times_train, year_cycle_train)) 
        x_train_cnn = np.dstack((motifs_train, temperature_train, cloudcover_train, radiation_train, year_cycle_train))
    else:
        occupancy_motif = motif_data['occupancy (0-1)'][motifs['motif_position']:motifs['motif_position']+96]
        occupancy_train = train_data['occupancy (0-1)'].values.reshape(train_days, 96)
        occupancy_train = [i-occupancy_motif.values for i in occupancy_train] 
        x_train = np.column_stack((motifs_train, temperature_train, occupancy_train, humidity_train, cloudcover_train, radiation_train, weekday_train, times_train)) 
        x_train_cnn = np.dstack((motifs_train, weekend_train, temperature_train, radiation_train, month_train))
    Solar_pre_model = NN_model(x_train_cnn, y_train, 96) 
    model_cnn = Solar_pre_model.CNN_series() if NN_type == 'CNN' else Solar_pre_model.ResNet()
    ######### nov data
    nov_length = 30*4*24
    nov_days = 30
    # oct_motifs, oct_motif_data, oct_motif_pattern, oct_temperature_motif, oct_cloudcover_motif, oct_humidity_motif, oct_radiation_motif, oct_times_motif = motifs_discovery(data_cleanned, user_type, predict_range)
    weather_data = data_cleanned['weather'][:nov_length].copy()
    cloudcover_nov = weather_data ['total_cloud_cover (0-1)'].values.reshape(nov_days, 96)
    humidity_nov = weather_data ['relative_humidity ((0-1))'].values.reshape(nov_days, 96)
    radiation_nov = weather_data ['surface_solar_radiation (W/m^2)'].values.reshape(nov_days, 96)
    temperature_nov = weather_data['temperature (degC)'].values.reshape(nov_days, 96)
    temperature_nov = [i-temperature_motif.values for i in temperature_nov] 
    cloudcover_nov = [i-cloudcover_motif.values for i in cloudcover_nov] 
    humidity_nov = [i-humidity_motif.values for i in humidity_nov]
    radiation_nov = [i-radiation_motif.values for i in radiation_nov]
    times_nov = pd.CategoricalIndex(weather_data.index.time).codes.astype(float)
    ###### For building
    # sun_cycle_nov = np.maximum(np.sin(2*np.pi *(times_nov+16)/96), 0 ).reshape(nov_days, 96)
    ###### For solar
    times_nov= times_nov.reshape(nov_days, 96)
    weekday_nov = pd.CategoricalIndex(weather_data.index.weekday).codes.astype(float).reshape(nov_days, 96)
    motifs_nov = [motif_pattern for i in range(nov_days)]
    motifs_nov = np.array(motifs_nov)
    month_nov = pd.CategoricalIndex(weather_data.index.month).codes.astype(float).reshape(nov_days, 96)
    month_nov = month_train[-1][-1]+1
    year_cycle_nov = abs(np.sin(2*np.pi*(month_nov-5)/24))
    year_cycle_nov = year_cycle_nov.reshape(nov_days, 96)
    weekend_nov = np.where((weekday_nov == 5) | (weekday_nov == 6), True, False)
    if user_type.find('Solar') != -1:
        if user_type = 'Solar0':
            year_cycle_nov = abs(np.sin(2*np.pi*(month_nov-1)/24))
        x_nov = np.column_stack((motifs_nov, cloudcover_nov, radiation_nov, times_nov, month_nov)) 
        x_nov_cnn = np.dstack((motifs_nov, temperature_nov, cloudcover_nov, radiation_nov, year_cycle_nov)) 
    else:
        occupancy_nov = data_cleanned['occupancy'][:nov_length].values.reshape(nov_days, 96)
        times_nov[252:] = (times_nov[252:]+4)%96
        nov_occupancy_motif = motif_data['occupancy (0-1)'][motifs['motif_position']:motifs['motif_position']+96]
        occupancy_nov = [i-oct_occupancy_motif.values for i in occupancy_oct] 
        x_nov = np.column_stack((motifs_nov, temperature_nov, occupancy_nov, humidity_nov, cloudcover_nov, radiation_nov, weekday_nov, times_nov)) 
        x_nov_cnn = np.dstack((motifs_nov, weekend_nov, temperature_nov, radiation_nov,  month_nov))
    ### early stop settings
    test_epochs = 20000
    test_patience = 1000
    model_cnn.fit(x_train_cnn, y_train, epochs = test_epochs, batch_size=30, verbose = 0, callbacks=[keras.callbacks.EarlyStopping(monitor='loss', patience=test_patience)])
    y_predict_cnn = model_cnn.predict(x_nov_cnn).reshape(nov_days*96)
    return model_cnn, x_nov_cnn, y_predict_cnn


## Build the solar prediction 
### Two types of NNs can be used (2-layer 1D-CNN and ResNet)
### Trained models and used input features can be saved based on needs


In [None]:
solar_NNs = {
    'Solar0':'ResNet',
    'Solar1':'CNN',
    'Solar2':'ResNet',
    'Solar3':'ResNet',
    'Solar4':'CNN',
    'Solar5':'ResNet',
}
solar_predictions = {}
for i in solar_NNs.keys():
    model_solar, x_nov, y_nov= RM_prediction(data_cleanned_v2, i, solar_NNs[i])
    solar_predictions[i] = [max(j,0) for j in y_nov]

## Building Prediction
- We replaced buildings' holiday data (AFL) from the real dataset with predicted values to remove the abnormal disturbance from the holiday
- We also tried to replace it with weekends but the holidays have special profiles 

In [None]:
# Read raw building data
def read_raw_building_data():
    # Load pre-processed data for phase 2
    raw_building_df = data_cleanned_v2

    # Consider time zone change from AEST --> AEDT
    time_zone_change = pd.to_datetime('2020-10-03 16:00')
    for key in raw_building_df:
        if key.find('Building') != -1:
            raw_building_df[key] = raw_building_df[key].rename(columns={"consumption": key})
            raw_building_df[key] = raw_building_df[key].reset_index()
            pd.options.mode.chained_assignment = None
            
            # +1 hour for all time instances after 2020-10-03 16:00
            for i in range(len(raw_building_df[key])):
                if raw_building_df[key]['index'][i] >= time_zone_change:
                    raw_building_df[key]['index'][i] = raw_building_df[key]['index'][i] + timedelta(hours=1)
            raw_building_df[key] = raw_building_df[key].set_index('index')
    
    # Replace the holiday daytime real data with the predicted data
    # Holiday: Oct 23 2020 (daytime)--> Index: [-879 -831]
    oct_prediction = pd.read_pickle("data/submissions_phase1.pickle")
    predicted_building1 = oct_prediction['submission_Oct9']['Building1']
    predicted_building3 = oct_prediction['submission_Oct9']['Building3']
    predicted_building6 = oct_prediction['submission_Oct9']['Building6']

    raw_building_df['Building1']['Building1'][-879:-831] = predicted_building1[-879:-831].values
    raw_building_df['Building3']['Building3'][-879:-831] = predicted_building3[-879:-831].values
    raw_building_df['Building6']['Building6'][-879:-831] = predicted_building6[-879:-831].values

    return raw_building_df

### Main function for replicating building consumption predictions

In [None]:

def building_prediction(dict_df, drop_name=None):
    ### STL decomposition for building 1, 3 and 6
    ### Tree-based learning directly on building 0 and 5
    ### Flat prediction of 1 on building 4
    building_predictor = building_methods(dict_df)
    y_prediction = []
    for key in dict_df:
        
        if key.find('Solar') != -1 or key == 'occupancy' or key == 'weather':
            continue
        
        if drop_name is not None and key in drop_name:
            continue
        
        print('Predicting for', key)
        dict_df = building_predictor.building_decomposition(key)
        
        if key == 'Building1' or key == 'Building3' or key == 'Building6':
            SVM_input = ['tempC', 'tempC_2', 'humidity', 'cloudcover', 'time', 'dew_tempC', 'dew_tempC_2', 'isweekend', 
                         'weekday', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'time_group', 'time_group_2']
            RF_input = ['tempC', 'humidity', 'cloudcover', 'dew_tempC', 'time', 'weekday', 'isweekend', 'time_group']
            training_length = 24*4*31

            # Collect prediction data from different machine learning methods (RF, SVM, GB)
            # Each output is a dict containing results from different components (residual, seasonal, trend, original)
            y_dict_RF, y_dict_SVM, y_dict_GB = building_predictor.train_unseen_STL(key, RF_input, SVM_input, training_length)
            
            y_main = y_dict_GB['residual'] + y_dict_GB['trend'] + y_dict_RF['seasonal']
            if key == 'Building1':
                base_consumption = 12
            if key == 'Building3':
                base_consumption = 350
            if key == 'Building6':
                base_consumption = 29
            
            # Half the peak prediction during midday of 03 Nov 2020 holiday
            y_main = building_predictor.holiday_halving(y_main, base_consumption)
            
        elif key == 'Building0':
            SVM_input = ['tempC', 'tempC_2', 'humidity', 'cloudcover', 'time', 'dew_tempC', 'dew_tempC_2', 'isweekend', 
                         'weekday', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'time_group', 'time_group_2']
            RF_input = ['tempC', 'humidity', 'cloudcover', 'radiation', 'time', 'weekday', 'isweekend', 'dew_tempC', 'time_group']
            training_length = 24*4*13

            # Collect prediction data from different machine learning methods (RF, SVM, GB)
            y_main, y_SVM_temp, y_GB_temp = building_predictor.train_unseen_tree(key, RF_input, SVM_input, training_length)
            # Increase the prediction values for first few intervals
            y_main[0:37] += 20
            
        elif key == 'Building4':
            # A flat prediction of 1 on Building 4
            y_main = np.ones(96*30)
            
        elif key == 'Building5':
            SVM_input = ['tempC', 'tempC_2', 'humidity', 'cloudcover', 'time', 'dew_tempC', 'dew_tempC_2', 'isweekend', 
                         'weekday', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'time_group', 'time_group_2']
            RF_input = ['tempC', 'humidity', 'cloudcover', 'time', 'weekday', 'isweekend', 'time_group']
            training_length = 24*4*31

            # Collect prediction data from different machine learning methods (RF, SVM, GB)
            y_main, y_SVM_temp, y_GB_temp = building_predictor.train_unseen_tree( key, RF_input, SVM_input, training_length)
            
        y_prediction.append(y_main)
        
    return y_prediction

### Run the building consumption predictions

In [None]:
dict_df = read_raw_building_data()
y_building_predict = building_prediction(dict_df, drop_name=None)

### Generate Prediction Outputs

In [None]:
prediction_list = []
index_name =['Building0', 'Building1', 'Building3', 'Building4', 'Building5', 'Building6']
index_name.extend(solar_predictions.keys())
for i in range(len(y_building_predict)):
    prediction_list.append(y_building_predict[i])
for j in solar_predictions.keys():
    prediction_list.append(solar_predictions[j])
total_predicted_df = pd.DataFrame(prediction_list, index=index_name)

In [None]:
import os
if not os.path.exists('outputs'):
    os.makedirs('outputs')
total_predicted_df.to_csv('outputs/forecasting.csv',header = None)