In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
plt.style.use("seaborn-whitegrid")

import seaborn as sns
from collections import Counter


from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import time
from sklearn.preprocessing import MinMaxScaler
from numpy import newaxis

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
tcs_df = pd.read_csv('/kaggle/input/tcs-stock-data/TCS.csv')

In [None]:
tcs_df.columns

In [None]:
tcs_df.head()

In [None]:
tcs_df.info()

In [None]:
tcs_df.describe()

In [None]:
tcs_df.shape

In [None]:
tcs_df.corr()

In [None]:
plt.figure(figsize=(10,10))
sns.heatmap(tcs_df.corr(), cmap="YlGnBu", annot= True,)
plt.show()

In [None]:
sns.set()
# plt.figure(figsize = (8, 5))
plt.title('High Stockprice Distribution')
sns.distplot(tcs_df['High'], color='orange')

In [None]:
sns.set()
# plt.figure(figsize = (8, 5))
plt.title('Low Stockprice Distribution')
sns.distplot(tcs_df['Low'])

In [None]:
tcs_stock_volume = tcs_df.Volume.values.astype('float32')
tcs_stock_volume = tcs_stock_volume.reshape(504, 1)
tcs_stock_volume.shape

In [None]:
plt.figure(figsize=(16,8))
plt.title('Volume History')
plt.plot(tcs_df['Volume'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Volume Price USD ($)', fontsize=18)
plt.show()

In [None]:
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(tcs_df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

In [None]:
tcs_df.hist(figsize=(12, 12));

In [None]:
tcs_df.head()

In [None]:
tcs_df_new=tcs_df

In [None]:
ma_day = [10, 20, 50]

for ma in ma_day:
    column_name = f"MA for {ma} days"
    tcs_df_new[column_name] = tcs_df_new['Close'].rolling(ma).mean()

In [None]:
tcs_df_new.head()

In [None]:
fig, axes = plt.subplots(2,2)
fig.set_figheight(8)
fig.set_figwidth(15)

tcs_df_new[['Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days']].plot(ax=axes[0,0])
#axes[0,0].set_title('TCS')

fig.tight_layout()

In [None]:
tcs_df['Daily Return'] = tcs_df['Close'].pct_change()

# Then we'll plot the daily return percentage
fig, axes = plt.subplots(nrows=2, ncols=2)
fig.set_figheight(8)
fig.set_figwidth(15)

tcs_df['Daily Return'].plot(ax=axes[0,0], legend=True, linestyle='--', marker='o')
axes[0,0].set_title('TCS')

fig.tight_layout()

In [None]:
# Comparing Google to itself should show a perfectly linear relationship
sns.jointplot('Close', 'Close', tcs_df, kind='scatter', color='seagreen')

In [None]:
sns.pairplot(tcs_df, kind='reg')

In [None]:
# Create a new dataframe with only the 'Close column 
data = tcs_df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))

training_data_len

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

scaled_data

In [None]:
# Create the training data set 
# Create the scaled training data set
train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61:
        print(x_train)
        print(y_train)
        print()
        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_train.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

In [None]:
valid