# Estimation of Electricity Production from Photovoltaic Panels

This notebook details the use of LSTM based Neural Network for estimation of electricity produced from Photovoltaic Panels based on weather data.
<br>
This is covered in two parts:
<br>
- Prediction of Irradiance
- Prediction of Power Output

## Setup

In [None]:
import numpy as np
import datetime
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.callbacks import EarlyStopping
import tensorflow as tf

## Dataset
The dataset contains weather data such as Temperature, Pressure, Humidity.

In [None]:
#Loading the dataset

PATH = 'PATH_TO_INPUT FILE'

df = pd.read_csv(PATH)
df['TimeStamp'] = pd.to_datetime(df['TimeStamp'])
df['year'] = df['TimeStamp'].dt.year
df['date'] = df['TimeStamp'].dt.date

## Split the Data
Data is divided into training and testing sets.

In [None]:
train = df.loc[(df.year < 2015)]
train = train.reset_index(drop=True)

In [None]:
test = df[df.year == 2015]
test = test.reset_index(drop=True)

## Data Normalization
Data is scaled using the Min-Max Scaler. All the features are scaled between the range [0,1]. 


In [None]:
#scaling the data

scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = train
test_scaled = test

train_scaled[['sunHour', 'uvIndex.1', 'FeelsLikeC', 'HeatIndexC', 'cloudcover', 'humidity', 'pressure', 'tempC', 'visibility', 'day_of_year','hour_of_day']] = scaler.fit_transform(train[['sunHour', 'uvIndex.1', 'FeelsLikeC', 'HeatIndexC', 'cloudcover', 'humidity', 'pressure', 'tempC', 'visibility', 'day_of_year','hour_of_day']])
test_scaled[['sunHour', 'uvIndex.1', 'FeelsLikeC', 'HeatIndexC', 'cloudcover', 'humidity', 'pressure', 'tempC', 'visibility', 'day_of_year','hour_of_day']] = scaler.transform(test[['sunHour', 'uvIndex.1', 'FeelsLikeC', 'HeatIndexC', 'cloudcover', 'humidity', 'pressure', 'tempC', 'visibility', 'day_of_year','hour_of_day']])

yscaler = MinMaxScaler(feature_range=(0, 1))
train_scaled[['dc_pow']] = yscaler.fit_transform(train[['dc_pow']])
test_scaled[['dc_pow']] = yscaler.transform(test[['dc_pow']])

Irrscaler = MinMaxScaler(feature_range=(0, 1))
train_scaled[['Irr']] = Irrscaler.fit_transform(train[['Irr']])
test_scaled[['Irr']] = Irrscaler.transform(test[['Irr']])

In [None]:
#separating features and labels for irradiance prediction

#training data
trainf = train_scaled[['uvIndex.1', 'cloudcover', 'humidity', 'tempC', 'visibility', 'day_of_year','hour_of_day',  'Irr']].copy()
traint = train_scaled[['Irr']]

train_dataset = trainf.values
train_target = traint.values

#testing data
testf = test_scaled[['uvIndex.1', 'cloudcover', 'humidity', 'tempC', 'visibility', 'day_of_year','hour_of_day', 'Irr']].copy()
testt = test_scaled[['Irr']]

test_dataset = testf.values
test_target = testt.values

## Windowing the Dataset
The data is windowed into input and output components.

In [None]:
def window_dataset(dataset, target, history_size,
                      target_size):
    
    '''
    The LSTM model makes predictions (target) based on a window of consecutive samples from the data (dataset)
    history_size specifies the number of past samples to be considered for predictions
    target_size specifies the time offset between past sample and predictions
    '''
    data = []
    labels = []
    
    for i in range(history_size, len(dataset)-target_size):
        indices = range(i-history_size, i, 1)
        data.append(dataset[indices])

        labels.append(target[i+target_size])

    return np.array(data), np.array(labels)


In [None]:
HISTORY = 1
TARGET = 0

x_train, y_train = window_dataset(train_dataset, train_target, HISTORY, TARGET)
x_test, y_test = window_dataset(test_dataset, test_target, HISTORY, TARGET)

In [None]:
#convert the data into TensorFlow Dataset to feed it into TensorFlow Model

BATCH_SIZE = 256
BUFFER_SIZE = 10000

print ('Single window of past history : {}'.format(x_train[0].shape))
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_data = val_data.batch(BATCH_SIZE).repeat()

## LSTM based Neural Network for Irradiance Prediction

In [None]:
#neural network for irradiance prediction
EPOCHS = 300
es = EarlyStopping(monitor='val_loss', mode='min', patience=60, min_delta=0.0001, verbose=1,restore_best_weights=True)

Irr_model = tf.keras.models.Sequential()
Irr_model.add(tf.keras.layers.LSTM(32, input_shape=x_train.shape[-2:], return_sequences=True))
Irr_model.add(tf.keras.layers.LSTM(32, activation="relu"))
Irr_model.add(tf.keras.layers.Dense(16, activation="relu"))
Irr_model.add(tf.keras.layers.Dense(8, activation="relu"))
Irr_model.add(tf.keras.layers.Dense(1))

Irr_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')

In [None]:
Irr_history = Irr_model.fit(train_data, epochs=EPOCHS,
                                            steps_per_epoch=200,
                                            validation_data=val_data,
                                            validation_steps=50,
                                            callbacks=[es]
                                        )

In [None]:
#add the predicted irradiace values to the dataset

y = Irr_model.predict(x_test)
Irr_cal = Irr_model.predict(x_train)

test_scaled.drop([0], inplace = True)
train_scaled.drop([0], inplace = True)

test_scaled['Irr_cal'] = y
train_scaled['Irr_cal'] = Irr_cal

In [None]:
# sampling data for hourly predictions

train_scaled = train_scaled.groupby(['date','hour_of_day']).first().reset_index()
test_scaled = test_scaled.groupby(['date','hour_of_day']).first().reset_index()

In [None]:
# separating features and labels for hourly power production

xtrain = train_scaled[['Irr_cal', 'uvIndex.1', 'tempC', 'cloudcover', 'humidity', 'day_of_year','hour_of_day', 'dc_pow']].copy()
ytrain = train_scaled[['dc_pow']].copy()

xtest = test_scaled[['Irr_cal', 'uvIndex.1', 'tempC', 'cloudcover', 'humidity', 'day_of_year','hour_of_day', 'dc_pow']].copy()
ytest = test_scaled[['dc_pow']].copy()

trainx = xtrain.values
trainy = ytrain.values

testx = xtest.values
testy = ytest.values

In [None]:
# windowing dataset

HISTORY = 1     #to include more historical data for better predictions, increase HISTORY value
TARGET = 0      #for predictions of power produced 3 hours later, set TARGET = 3

x_train, y_train = window_dataset(trainx, trainy, HISTORY, TARGET)
x_test, y_test = window_dataset(testx, testy, HISTORY, TARGET)

In [None]:
#convert the data into TensorFlow Dataset to feed it into TensorFlow Model

print ('Single window of past history : {}'.format(x_train[0].shape))
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_data = val_data.batch(BATCH_SIZE).repeat()

## LSTM based Neural Network for Power Production

In [None]:
# LSTM based Neural Network for hourly power production

es = EarlyStopping(monitor='val_loss', mode='min', patience=30, min_delta=0.0001, verbose=1,restore_best_weights=True)


model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(64, input_shape=x_train.shape[-2:], return_sequences=True))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(32, activation="relu"))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(16, activation="relu"))
model.add(tf.keras.layers.Dense(8, activation="tanh"))
model.add(tf.keras.layers.Dense(1))

model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')

In [None]:
history = model.fit(train_data, epochs =    EPOCHS,
                                            steps_per_epoch=200,
                                            validation_data=val_data,
                                            validation_steps=50,
                                            callbacks=[es]
                                        )

In [None]:
y = model.predict(x_test)
mae = mean_absolute_error(y_test, y)
rmse = np.sqrt(mean_squared_error(y_test, y))
print('Test MAE: %.3f' %mae)
print('Test RMSE: %.3f' %rmse)

## Saving the model

In [None]:
model.save('weights.h5')    #saves the model in a .h5 file

In [None]:
#Use the saved .h5 file for predictions

#load the model
saved_model = tf.keras.models.load_model('weights.h5')
#can be used for predictions after scaling the input data
predicted_output = saved_model.predict(x_test)