In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing necessary libraries

In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

## Analyze data

In [None]:
# Importing data
df = pd.read_csv('/kaggle/input/gamestop-historical-stock-prices/GME_stock.csv')
df.head()

In [None]:
df.info()

In [None]:
# Converting the date column into datetime index
df['date'] = pd.to_datetime(df['date'])
df.set_index('date',inplace=True)
df.head()

## Visualization

In [None]:
# Plotting the open, close, high, low, volume and adjusted close value for the term of 12 months
fig = px.line(df, x=df.index, y=df.columns,
              title='Plot of values for a 12 month period')
fig.update_xaxes(
    dtick="M12",
    tickformat="%b\n%Y")
fig.show()

As seen from the abpve plot, there is a sudden surge in the values of the opening price, closing price, high price, low price, volume and the adjusted closing price.

In [None]:
# Plotting the data from december 2020 to january 2021 to view the trend
fig = px.line(df, x=df.index, y=df.columns, 
              range_x=['2020-12-01','2021-01-28'],
              title='Plot of values for December 20 and January 21')
fig.show()

There is a sudden surge in the open price on 27 january 2021. But the volume experienced a surge right before on 13 january 2021. 

In [None]:
# Plotting the data using range slider
fig = px.line(df, x=df.index, y=df.columns, 
              title='Plot of data with range slider')

fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
# Plotting the total amount traded
df['total_amount_traded'] = df['open_price']*df['volume']

fig = px.line(df, x=df.index, y=df.total_amount_traded,
              title='Plot of total amount traded')
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

In [None]:
# Plotting the moving average for the opening price
df['MA256'] = df['open_price'].rolling(256).mean()
df[['open_price','MA256']].plot(figsize=(16,8))

In [None]:
# Plotting the exponential moving average for the opening price
df['EWMA12'] = df['open_price'].ewm(span=12).mean()
df[['open_price','EWMA12']].plot(figsize=(16,8))

In [None]:
# Plotting candlestick chart
fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['open_price'],
                high=df['high_price'],
                low=df['low_price'],
                close=df['close_price'])])

fig.show()

## Prediction - Using LSTM model 

In [None]:
# Importing required libraries for LSTM
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM

In [None]:
#creating dataframe
data = df.sort_index(ascending=True, axis=0)
new_data = pd.DataFrame(index=range(0,len(df)),columns=['Date', 'Close'])
for i in range(0,len(data)):
    new_data['Date'][i] = data.index[i]
    new_data['Close'][i] = data['close_price'][i]
new_data.head()

In [None]:
#setting index
new_data['Date'] = pd.to_datetime(new_data['Date'])
new_data.set_index('Date',inplace=True)
new_data.head()

In [None]:
# Creating train and test sets
dataset = new_data.values

train = dataset[0:3773,:]
valid = dataset[1000:,:]

In [None]:
# Feature scaling
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

In [None]:
# Converting dataset into x_train and y_train for 60 timesteps
x_train, y_train = [], []
for i in range(60,len(train)):
    x_train.append(scaled_data[i-60:i,0])
    y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)

x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

In [None]:
# Build the LSTM model
regressor = Sequential()

regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (x_train.shape[1], 1)))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

regressor.add(Dense(units = 1))

regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

regressor.fit(x_train, y_train, epochs = 100, batch_size = 32)

In [None]:
# Predicting values, using past 60 from the train data
inputs = new_data[len(new_data) - len(valid) - 60:].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

In [None]:
X_test = []
for i in range(60,inputs.shape[0]):
    X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)

In [None]:
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
closing_price_pred = regressor.predict(X_test)
closing_price_pred = scaler.inverse_transform(closing_price_pred)

In [None]:
rms=np.sqrt(np.mean(np.power((valid-closing_price_pred),2)))
rms

In [None]:
# Plotting
train = new_data[:3773]
valid = new_data[1000:]
valid['Predictions'] = closing_price_pred
plt.plot(train['Close'])
plt.plot(valid[['Close','Predictions']])