In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df=pd.read_csv('../input/fbstock/FB.csv')
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df_1=df.reset_index()['Close']
df_1

In [None]:
df_1.shape

In [None]:
plt.plot(df_1)

In [None]:
np.array(df_1)

In [None]:
# LSTM are sensitive to the scaled data. So applying minmax scaler(normalization)

from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler=MinMaxScaler(feature_range=(0,1))
# for reshaping purpose np.array() is used bcos df_1 dataframe doesnot have reshape argument.
df_1=scaler.fit_transform(np.array(df_1).reshape(-1,1))    
print(df_1.shape)
df_1

## Preprocess the data

#### Preprocess the data------>mostly we use cross validation, random seed methods(for linear problem it is best). But in time series there will be always a relation between the previous data. 
(eg --> Take a 4 days of stock prices. In that day2 stock price have related to day1. It have some connection. And day3 stock price have sone connection with day2 and day1...like this it goes on. so while data split, for training we have to take data with respective to continious date and for testing take remaining data)

In [None]:
# Spliting the data---> for training, 65% of data and remaining for testing.

train_size=int(len(df_1)*0.65)
print(train_size)
test_size=len(df_1)-train_size
print(test_size)

In [None]:
#Training data

print(len(df_1[:train_size,:]))     
print('\n',df_1[:train_size,:])

In [None]:
#Testing data

print(len(df_1[train_size:,:]))     
#print(df_1[train_size:,:])

In [None]:
#Assigning variable for train and test data

train_df,test_df=df_1[:train_size,:],df_1[train_size:,:]

eg: If the stock cost is [45,78,98,65,32,15,100,15,57,45]

    Every cost value has some dependency of previous value. Here 98 has some dependency of 45 & 78.... 
    
    1. our train data is [45,78,98,65,32,15]   &   test data is [100,15,57,45]
    
    2. Now we are going to split X_train, y_train, X_test, y_test. For this we are spliting our above train data to X_train,y_train and test data to X_tesy, y_test.
    
    3. Timestep method is used here------->which means, in train data if timestep=2 then, it will take 1st 2 values as X_train and y_train will be 3rd value(assumes as predicted value). And in 2nd row it will take 2nd and 3rd values of train data as input and 4th data as output...
               
               X_train      y_train
                [45,78]       [98]
                [78,98]       [65]
                [98,65]       [32]   like this it goes on.... same for test data also
                
  FOR CREATING A DATASET OF ABOVE METHOD FUNCTION IS USED BELOW

In [None]:
def create_dataset(dataset,timestep):
    dataX, dataY=[],[]
    for i in range(len(dataset)-timestep-1):   #(range(0, 944))
        a=dataset[i:(i+timestep),0]            #If timestep=100, in 1st row 0 to 99 values is appended...2nd row 1 to 100 is appended...           
        dataX.append(a)
        dataY.append(dataset[(i+timestep),0])  #(output) In 1st row 100th value is appended...2nd row 101 value is appended...
    return np.array(dataX),np.array(dataY)

In [None]:
X_train, y_train=create_dataset(train_df,100)
X_test, y_test=create_dataset(test_df,100)

In [None]:
print(X_train.shape), print(y_train.shape)

In [None]:
print(X_test.shape), print(y_test.shape)

### Creating a stacked LSTM model

In [None]:
#reshape input as (samples,timesteps,features) which is required for LSTM

X_train=X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test=X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

In [None]:
model=Sequential()
model.add(LSTM(50,return_sequences=True,input_shape=(100,1)))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')

In [None]:
model.summary()

In [None]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=30,verbose=2)

In [None]:
train_pred=model.predict(X_train)
test_pred=model.predict(X_test)

In [None]:
# Transforming back to original form

train_pred=scaler.inverse_transform(train_pred)
test_pred=scaler.inverse_transform(test_pred)

In [None]:
train_pred

In [None]:
# RMSE performance metrics

import math
from sklearn.metrics import mean_squared_error

math.sqrt(mean_squared_error(y_train,train_pred))

In [None]:
math.sqrt(mean_squared_error(y_test,test_pred))

In [None]:

### Plotting 
# shift train predictions for plotting
look_back=100
trainPredictPlot = np.empty_like(df_1)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_pred)+look_back, :] = train_pred
# shift test predictions for plotting
testPredictPlot = np.empty_like(df_1)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_pred)+(look_back*2)+1:len(df_1)-1, :] = test_pred
# plot baseline and predictions
plt.plot(scaler.inverse_transform(df_1))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

## Predict stock for next 100 days

In [None]:
len(test_df)

In [None]:
x_input=test_df[463:].reshape(1,-1)
x_input.shape

In [None]:
x_input

In [None]:
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

In [None]:
temp_input

In [None]:

# demonstrate prediction for next 10 days
from numpy import array

lst_output=[]
n_steps=100
i=0
while(i<100):
    
    if(len(temp_input)>100):
        #print(temp_input)
        x_input=np.array(temp_input[1:])
        print("{} day input {}".format(i,x_input))
        x_input=x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))
        #print(x_input)
        yhat = model.predict(x_input, verbose=0)
        print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)
        lst_output.extend(yhat.tolist())
        i=i+1
    else:
        x_input = x_input.reshape((1, n_steps,1))
        yhat = model.predict(x_input, verbose=0)
        print(yhat[0])
        temp_input.extend(yhat[0].tolist())
        print(len(temp_input))
        lst_output.extend(yhat.tolist())
        i=i+1
    

print(lst_output)

In [None]:
day_new=np.arange(1,101)
day_pred=np.arange(101,201)

In [None]:
len(df_1)

In [None]:
plt.plot(day_new,scaler.inverse_transform(df_1[1508:]))
plt.plot(day_pred,scaler.inverse_transform(lst_output))

In [None]:
df3=df_1.tolist()
df3.extend(lst_output)
plt.plot(df3[1200:])