# Import and Read the Raw Data
## Import Libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import math

from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import and Read the Data

In [None]:
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_colwidth', -1)

antam = pd.read_csv('../input/antam-stock-market-by-kitto/ANTM.JK.csv', parse_dates=True)
antam['Date'] = pd.to_datetime(antam['Date'])
antam.index = antam['Date']

antam

## Data Info and Description

In [None]:
pd.set_option('display.max_rows', None)

antam_desc = pd.DataFrame()
antam_desc['isna'] = antam.isna().sum()
antam_desc['isnull'] = antam.isnull().sum()
antam_desc['nunique'] = antam.nunique()

print(antam.info(verbose=True), '\n','-'*80,'\n','-'*80,'\n', antam_desc)

## Fill NaN value

In [None]:
antam['Open'] = antam['Open'].interpolate()
antam['Close'] = antam['Close'].interpolate()
antam['High'] = antam['High'].interpolate()
antam['Low'] = antam['Low'].interpolate()
antam['Adj Close'] = antam['Adj Close'].interpolate()
antam['Volume'] = antam['Volume'].interpolate()

antam.isna().sum()

In [None]:
antam.describe()

# Data Visualization

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

ax.plot(antam['Date'], antam['Close'])
ax.set_title('Close Prices of ANTM.JK', fontsize = 20)

# Predict

In [None]:
from tensorflow.keras.layers import Bidirectional, Dropout, Activation, Dense, LSTM
from tensorflow.python.keras.layers import CuDNNLSTM
from tensorflow.keras.models import Sequential
import tensorflow as tf
from tensorflow import keras

print("Done")

## Split Data into Train and Test Sets

Let's say because of pandemic, the data should only contain on when pandemic started until today. A 11 months from pandemic started is train set, on 30 January 2020, declared the outbreak of COVID-19 to be a Public Health Emergency of International Concern. A month later until end of the data as the test set.

In [None]:
antam["Close"]['2020-01-30':'2021'].plot(figsize=(10,5),legend=True)
antam["Close"]['2021':].plot(figsize=(10,5),legend=True)
plt.legend(['Training set','Test set'])
plt.show()

In [None]:
train_set = antam.Close[(antam.index > '2020-01-30') & (antam.index < '2021-01-01')].values.reshape(-1, 1)
all_set = antam.Close[antam.index > '2020-01-30'].values.reshape(-1, 1)

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_all = scaler.fit_transform(all_set)
scaled_train = scaler.transform(train_set)

print("Train shape = {}".format(scaled_train.shape))
print("All shape = {}".format(scaled_all.shape))

window_size = 10    # Window size = number of previous values to predict the next value

def generateSequence(sequence, backward):
    x_train, y_train = list(), list()
    for i in range(sequence.shape[0]-backward):
        seq_x, seq_y = sequence[i:i+backward], sequence[i+backward]
        x_train.append(seq_x)
        y_train.append(seq_y)
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
    return x_train, y_train
    
x_train, y_train = generateSequence(scaled_train, window_size)
print("x shape = {}".format(x_train.shape))
print("y shape = {}".format(y_train.shape))

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, activation='relu', input_shape=(x_train.shape[1], 1)))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dense(1))

model_lstm.compile(loss='mean_squared_error', optimizer='adam')
epoch_history = model_lstm.fit(x_train, y_train, epochs=100, batch_size=36, verbose=2, validation_split=0.1)

In [None]:
train_score = model_lstm.evaluate(x_train, y_train)
print('Train Score: %.6f MSE (%.6f RMSE)' % (train_score, math.sqrt(train_score)))

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(epoch_history.history['loss'])
plt.plot(epoch_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
antam["Close"]['2020-01-30':'2021'].plot(figsize=(10,5),legend=True)
antam["Close"]['2021':].plot(figsize=(10,5),legend=True)
plt.legend(['Training set','Test set'])
plt.show()

y_train_predicted = model_lstm.predict(x_train)
y_inverse = scaler.inverse_transform(y_train)
y_train_predicted_inverse = scaler.inverse_transform(y_train_predicted)

plt.figure(figsize=(10, 5))
plt.plot(y_inverse.ravel(), label="Price", color='black')
plt.plot(y_train_predicted_inverse.ravel(), label="Predicted Price", color='blue')
plt.legend(loc=2)
plt.show()

# Predict all data when on Pandemic

In [None]:
x_train, y_train = generateSequence(scaled_all, window_size)

y_predicted = model_lstm.predict(x_train)
y_inverse = scaler.inverse_transform(y_train)
y_predicted_inverse = scaler.inverse_transform(y_predicted)

plt.figure(figsize=(10, 5))
plt.plot(y_inverse.ravel(), label="Close Price", color='black')
plt.plot(pd.Series(y_predicted_inverse[:211].ravel(),index=range(0,211)), label="Train Predicted Close Price", color='blue')
plt.plot(pd.Series(y_predicted_inverse[211:].ravel(),index=range(211,234)), label="Test Predicted Close Price", color='red')
plt.legend(loc=2)
plt.title("All data - Prediction at 1 day based on the previous {} days".format(window_size))

# Additional

Still don't know should to handle this missing date or not

In [None]:
antam['delta'] = antam['Date'] - antam['Date'].shift(1)
#antam[['Date', 'delta']].head()
antam['delta'].sum(), antam['Date'].count(), antam['delta'].nunique(), antam['delta'].value_counts()

Still on progress and further analysis