In [None]:
#data are from kaggle https://www.kaggle.com/datasets/portiamurray/anomaly-detection-smart-meter-data-sample 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import sklearn.preprocessing
import seaborn as sns

In [None]:
path = './Lastgang Elektroverbruche 160101-170511.xlsx'
df = pd.read_excel(path, engine='openpyxl', index_col=0)
df.isnull().sum()

In [None]:
df.index.name ='datetime'
df.columns = ['kwh']
df['date'] = df.index.date
df['time'] = df.index.time
df['year'] = df.index.year
df['weekday'] = df.index.strftime("%A")
df.head()

In [None]:
#Entire load curve and the daily load trends
pivot_kwh = df.pivot_table(index=df.index, 
                     values='kwh').plot(figsize=(15,4),
                     title='Entire Load Curve')
pivot_kwh_daily = df.pivot_table(index=df['time'], 
                     values='kwh',
                     aggfunc=np.mean).plot(figsize=(15,4),
                     title='Daily Trends')

In [None]:
#Load distributions & daily load curve
dist_kwh = df['kwh'].plot.hist(figsize=(15, 5), bins=100, title='Load Distribution')

dist_kwh_weekday = df.pivot_table(index=df['time'], 
                     columns='weekday', 
                     values='kwh',
                     aggfunc=np.mean).plot(figsize=(15,4),
                     title='kwh Day Load Trends')

In [None]:
#normalize the energy data
def normalize_data(df):
    scaler = sklearn.preprocessing.MinMaxScaler()
    df['kwh']=scaler.fit_transform(df['kwh'].values.reshape(-1,1))
    return df

df_norm = normalize_data(df)
df_norm = df_norm.drop(columns=['date','time','year','weekday'])
df_norm.shape

In [None]:
df_norm.head()

In [None]:
#data_loading
def load_data(stock, seq_len):
    X_train = []
    y_train = []
    for i in range(seq_len, len(stock)):
        X_train.append(stock.iloc[i-seq_len : i, 0])
        y_train.append(stock.iloc[i, 0])
    
    X_test = X_train[40000:]             
    y_test = y_train[40000:]
    
    X_train = X_train[:40000]           
    y_train = y_train[:40000]
    
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    
    #4 reshape data to input into RNN models
    X_train = np.reshape(X_train, (40000, seq_len, 1))
    X_test = np.reshape(X_test, (X_test.shape[0], seq_len, 1))
    
    return [X_train, y_train, X_test, y_test]

In [None]:
seq_len = 20

X_train, y_train, X_test, y_test = load_data(df_norm, seq_len)

print('X_train.shape = ',X_train.shape)
print('y_train.shape = ', y_train.shape)
print('X_test.shape = ', X_test.shape)
print('y_test.shape = ',y_test.shape)

In [None]:
from keras.layers import Dense,Dropout,SimpleRNN,LSTM
from keras.models import Sequential
from sklearn.metrics import r2_score

rnn_model = Sequential()

rnn_model.add(SimpleRNN(40,activation="tanh",return_sequences=True, input_shape=(X_train.shape[1],1)))
rnn_model.add(Dropout(0.15))

rnn_model.add(SimpleRNN(40,activation="tanh",return_sequences=True))
rnn_model.add(Dropout(0.15))

rnn_model.add(SimpleRNN(40,activation="tanh",return_sequences=False))
rnn_model.add(Dropout(0.15))

rnn_model.add(Dense(1))

rnn_model.summary()

In [None]:
rnn_model.compile(optimizer="adam",loss="MSE")
rnn_model.fit(X_train, y_train, epochs=10, batch_size=80)

In [None]:
rnn_predictions = rnn_model.predict(X_test)

rnn_score = r2_score(y_test,rnn_predictions)
print("R2 Score of RNN model = ",rnn_score)

In [None]:
def plot_predictions(test, predicted, title):
    plt.figure(figsize=(16,4))
    plt.plot(test, color='blue',label='Actual power consumption data')
    plt.plot(predicted, alpha=0.7, color='orange',label='Predicted power consumption data')
    plt.title(title)
    plt.xlabel('Time')
    plt.ylabel('Normalized power consumption scale')
#     plt.xlim(0,200)
    plt.legend()
    plt.show()
    
plot_predictions(y_test, rnn_predictions, "Load Predictions Validation")
