In [6]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
import warnings
warnings.filterwarnings('ignore')

# Importing custom libraries
import sys
sys.path.append('/utilities/')
from utilities.data_manipulation import pivot_dataframe, convert_to_supervised, rename_dataframe_supervised, plot_results, plot_comparison

# Random seed for reproducibility
tf.random.set_seed(42)


### Loading Data

In [7]:
ids = ['meantemp', 'humidity']
formatted_df = pd.read_csv('Data/DailyDelhiClimate.csv')
formatted_df.head(10)

Unnamed: 0,Date,meantemp,humidity
0,1/1/2013,10.0,84.5
1,1/2/2013,7.4,92.0
2,1/3/2013,7.166667,87.0
3,1/4/2013,8.666667,71.333333
4,1/5/2013,6.0,86.833333
5,1/6/2013,7.0,82.8
6,1/7/2013,7.0,78.6
7,1/8/2013,8.857143,63.714286
8,1/9/2013,14.0,51.25
9,1/10/2013,11.0,62.0


### Graphing Initial Dataset

In [8]:
# Create traces
fig = go.Figure()

for factor_level in ids:
    # Adding plot of original_df
    fig.add_trace(go.Scatter(x=formatted_df['Date'], y=formatted_df[factor_level],
                        mode='lines',
                        name=factor_level))
fig.update_layout(title = "Temp and Humidity over Time, Delhi India")

### Standardizing

In [9]:
# creating scalar
scaler = MinMaxScaler(feature_range=(0, 1))

# Normalizing target columns
for col in ids:
    formatted_df [col] = scaler.fit_transform(formatted_df[[col]])
formatted_df.head(10)

Unnamed: 0,Date,meantemp,humidity
0,1/1/2013,0.122271,0.820957
1,1/2/2013,0.042795,0.907591
2,1/3/2013,0.035662,0.849835
3,1/4/2013,0.081514,0.668867
4,1/5/2013,0.0,0.84791
5,1/6/2013,0.030568,0.80132
6,1/7/2013,0.030568,0.752805
7,1/8/2013,0.087336,0.580858
8,1/9/2013,0.244541,0.436881
9,1/10/2013,0.152838,0.561056


### Test/Train Split

In [10]:
# setting test/train ratio
total_observations = len(formatted_df)
train_ratio = 0.7

# performing time based test/train split
train_df = formatted_df[:int(total_observations * train_ratio)]
test_df = formatted_df[int(total_observations * train_ratio):]
print(len(train_df), len(test_df))


1023 439


Unnamed: 0,Date,meantemp,humidity
1023,10/21/2015,0.626638,0.5625
1024,10/22/2015,0.573144,0.491749
1025,10/23/2015,0.550218,0.441213
1026,10/24/2015,0.576965,0.459983
1027,10/25/2015,0.622817,0.389233
1028,10/26/2015,0.599891,0.446988
1029,10/27/2015,0.573144,0.471535
1030,10/28/2015,0.454694,0.506188
1031,10/29/2015,0.492904,0.578383
1032,10/30/2015,0.500546,0.6217


### Anomaly Detection with LSTM Autoencoders

First we need to define both an encoder and a decoder:

In [15]:
def create_model():

    # Encoder
    encoder = Sequential()
    encoder.add(LSTM(32, activation='relu', input_shape=(20,1)))

    # Decoder 
    decoder = Sequential() 
    decoder.add(LSTM(32, activation='relu', input_shape=(32,1)))
    decoder.add(Dense(20)) 

    # Autoencoder
    autoencoder = Sequential([encoder, decoder])
    autoencoder.compile(loss='mse', optimizer='adam')

    return autoencoder

# Calling create model function
model = create_model()