In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import pandas as pd
pd.options.mode.chained_assignment = None
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
sns.set(style='whitegrid',palette ='muted')
rcParams['figure.figsize']=14, 8
np.random.seed(1)
tf.random.set_seed(1)
print("TensorFlow Version: ",tf.__version__)

In [None]:
df = pd.read_csv("../input/sp500-daily-19862018/spx.csv",parse_dates=['date'])
df.head()

In [None]:
df.shape


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.date,y=df.close,mode='lines',name='close'))
fig.update_layout(showlegend=True)
fig.show()

# Data Preprocessing


In [None]:
train_size = int(len(df)*0.8)
test_size = len(df)-train_size
train,test = df.iloc[0:train_size],df.iloc[train_size:len(df)]
print(train.shape,test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
sc = sc.fit(train[['close']])

train['close'] = sc.transform(train[['close']])
test['close'] = sc.transform(test[['close']])

# Train And Test Split

In [None]:
def create_sequences(X,y,time_steps=1):
    Xs,ys =[],[]
    for i in range(len(X)-time_steps):
        Xs.append(X.iloc[i:(i+time_steps)].values)
        ys.append(y.iloc[i+time_steps])
        
    return np.array(Xs),np.array(ys)
    
    

In [None]:
time_steps=30
X_train,y_train = create_sequences(train[['close']],train.close,time_steps)
X_test,y_test = create_sequences(test[['close']],train.close,time_steps)

In [None]:
print(X_train.shape)

# LSTM Autoencoder

In [None]:
timesteps = X_train.shape[1]
num_features = X_train.shape[2]


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout,RepeatVector,TimeDistributed
model = Sequential([
    LSTM(128,input_shape=(timesteps,num_features)),
    Dropout(0.2),
    RepeatVector(timesteps),
    LSTM(128,return_sequences=True),
    Dropout(0.2),
    TimeDistributed(Dense(num_features))
    
])
model.compile(loss='mae',optimizer='adam',metrics=['accuracy'])
model.summary()

# Train The Autoencoder

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3,mode='min')
history = model.fit(
        X_train,y_train,epochs=100,
    batch_size=32,validation_split = 0.1,callbacks=[es],shuffle=False
)

# Plot Metrics and Evaluate The Model

In [None]:
plt.plot(history.history['loss'],label='Training Loss')
plt.plot(history.history['val_loss'],label='Validation Loss')
plt.legend()

In [None]:
X_train_pred = model.predict(X_train)
train_mae_loss = pd.DataFrame(np.mean(np.abs(X_train_pred-X_train),axis=1), columns=['Error'])

In [None]:
model.evaluate(X_test,y_test)

In [None]:
sns.distplot(train_mae_loss,bins=50,kde=True)

In [None]:
threshold =0.65

In [None]:
X_test_pred = model.predict(X_test)

In [None]:
test_mae_loss = np.mean(np.abs(X_test_pred-X_test),axis=1)

In [None]:
test_score_df = pd.DataFrame(test[time_steps:])
test_score_df['loss'] = test_mae_loss
test_score_df['threshold'] = threshold
test_score_df['anamoly'] = test_score_df.loss > test_score_df.threshold
test_score_df['close'] = test[time_steps:].close

In [None]:
test_score_df.head()

In [None]:
test_score_df.tail()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test[time_steps:].date,y=test_score_df.loss,mode='lines',name='Test Loss'))
fig.add_trace (go.Scatter(x=test[time_steps:].date,y=test_score_df.threshold,mode='lines',name='Threshold'))
fig.update_layout(showlegend=True)
fig.show()



In [None]:
anamolies = test_score_df[test_score_df.anamoly == True]
anamolies.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=test[time_steps:].date,y=sc.inverse_transform(test[time_steps:].close),mode='lines',name='Close Price'))
fig.add_trace (go.Scatter(x=anamolies.date,y=sc.inverse_transform(anamolies.close),mode='markers',name='Ano'))
fig.update_layout(showlegend=True)
fig.show()
