In [202]:
# Loading libaries and Dataset 
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt


# Loading Dataset

In [203]:
data = pd.read_csv("../input/tesla-stocks-where-is-my-cyber-truck/tesla_stonks_up_and_down.csv")
data


In [204]:
data.info()
#Lets now check for null fields
import seaborn as sns
sns.heatmap(data.isnull(),yticklabels=False,cbar=False,cmap='viridis')
data.isnull().sum()


In [205]:
# Duplicates VALUE 
print(f'Duplicates in the dataset: {data.duplicated().sum()}')
print(f'Percentage of duplicates: {data.duplicated().sum()/len(data)*100}%') # if 0.0 % that means No Duplicate data


In [206]:
#Cardinality 
data.nunique() # To determine the maximum and minimum number of variations in each column of the dataset


# Data Engineering

In [207]:
data.isna().sum()

# Statistics

In [208]:
pd.set_option("display.float","{:.2f}".format)
data.describe()

# Analysis for the year 2021
* Open vs Close
* High vs Low
* Date vs Volume


In [209]:
data_filtered = data[(data.Date >= "2021-01-01") & (data.Date <= "2021-12-31")]


In [210]:
print("Start Date : ",data_filtered.Date.min())
print("End Date   : ",data_filtered.Date.max())


In [211]:
# Statistics 
data_filtered.describe()

#Inference :
* Mean Opening Price :   $779.84

* Mean Closing Price :   $799.99

* Mean High value    :   $795.73

* Mean Low value     :   $762.71

* Mean Volume Traded :   27391628.97


In [212]:
print("Distribution Variance for Close vs Adj Close : {:.2f}".format(np.linalg.norm(data_filtered.Close - data_filtered["Adj Close"])))


* Inference : 0 variance for the Close vs Adj Close shows that adjusted closing was equal to closing price all times or almost all times.

In [213]:
print("\n\nDistribution Variance for Open vs Close : {:.2f}".format(np.linalg.norm(data_filtered.Open - data_filtered.Close)))
plt.rcParams["axes.linewidth"]="0"
plt.figure(figsize=(25,8))
plt.plot(list(range(data_filtered.Date.size)),data_filtered.Open,label="Open")
plt.plot(list(range(data_filtered.Date.size)),data_filtered.Close,label="Close")
plt.tick_params(bottom=False,left=False)
plt.title("Open vs Close for 2021")
plt.xlabel("Number of Days")
plt.ylabel("Value")
plt.xticks(np.arange(0,data_filtered.Open.size+1,5))
plt.grid(True,linewidth=0.5,alpha=0.5)
plt.legend()
plt.tight_layout()


In [214]:
print("Highest Opening Price Date : ",data_filtered[data_filtered.Open == data_filtered.Open.max()].Date.values[0])
print("Highest Closing Price Date : ",data_filtered[data_filtered.Close == data_filtered.Close.max()].Date.values[0])

print("Lowest Opening Price Date  : ",data_filtered[data_filtered.Open == data_filtered.Open.min()].Date.values[0])
print("Lowest Closing Price Date  : ",data_filtered[data_filtered.Close == data_filtered.Close.min()].Date.values[0])

* Inference : A higher opening and closing price was observed on 45th and 46th trading day respectively. A relatively low opening price was observed on 95th trading day and a lowest closing price was observed on 105nd trading day .

In [215]:
print("\n\nDistribution Variance for High vs Low   : {:.2f}".format(np.linalg.norm(data_filtered.High - data_filtered.Low)))
plt.rcParams["axes.linewidth"]="0"
plt.figure(figsize=(20,8))
plt.plot(list(range(data_filtered.Date.size)),data_filtered.High,label="High")
plt.plot(list(range(data_filtered.Date.size)),data_filtered.Low,label="Low")
plt.tick_params(bottom=False,left=False)
plt.title("High vs Low for 2019")
plt.xlabel("Number of Days")
plt.ylabel("Value")
plt.xticks(np.arange(0,data_filtered.Open.size+1,5))
plt.grid(True,linewidth=0.5,alpha=0.5)
plt.legend()
plt.show()


In [216]:
print("Peak High Price Date    : ",data_filtered[data_filtered.High == data_filtered.High.max()].Date.values[0])
print("Peak Low Price Date     : ",data_filtered[data_filtered.Low == data_filtered.Low.max()].Date.values[0])

print("Lowest High Price Date  : ",data_filtered[data_filtered.High == data_filtered.High.min()].Date.values[0])
print("Lowest Low Price Date   : ",data_filtered[data_filtered.Low == data_filtered.Low.min()].Date.values[0])


* Inference : The peak of High and Low value was observed during 45th day and the lowest of High and Low value was observed during 90th and 95th trading days respectively.


In [217]:
print("\n\n")
plt.rcParams["axes.linewidth"]="0"
plt.figure(figsize=(30,8))
plt.plot(list(range(data_filtered.Date.size)),data_filtered.Volume,label="Volume")
plt.tick_params(bottom=False,left=False)
plt.title("Sale Volume for 2021")
plt.xlabel("Number of Days")
plt.ylabel("Value")
plt.xticks(np.arange(0,data_filtered.Open.size+1,5))
plt.grid(True,linewidth=0.5,alpha=0.5)
plt.legend()
plt.show()

In [218]:
print("Peak Volume Date   : ",data_filtered[data_filtered.Volume == data_filtered.Volume.max()].Date.values[0])
print("Lowest Volume Date : ",data_filtered[data_filtered.Volume == data_filtered.Volume.min()].Date.values[0])


* Inference : The peak of trading volume was observed during the 5TH trading day followed by a flooring of the volume until the next two peaks during 35th and 45th trading days respectively.



# Analysis for the year 2022
* Open vs Close
* High vs Low
* Date vs Volume


In [219]:
# Make sure that you have all these libaries available to run the code successfully
from pandas_datareader import data
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import datetime as dt
import urllib.request, json
import os
import numpy as np
import tensorflow as tf # This code has been tested with TensorFlow 1.6
import math
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
df=pd.read_csv('../input/tesla-stocks-where-is-my-cyber-truck/tesla_stonks_up_and_down.csv')


In [220]:
pip install plotly


# 20 Days Indicators

In [221]:
df['diff'] = df['Close'] - df['Open']
df.loc[df['diff']>=0, 'color'] = 'green'
df.loc[df['diff']<0, 'color'] = 'red'


In [222]:
fig3 = make_subplots(specs=[[{"secondary_y": True}]])
fig3.add_trace(go.Candlestick(x=df.index,
                              open=df['Open'],
                              high=df['High'],
                              low=df['Low'],
                              close=df['Close'],
                              name='Price'))
fig3.add_trace(go.Scatter(x=df.index,y=df['Close'].rolling(window=20).mean(),marker_color='blue',name='20 Day MA'))
fig3.add_trace(go.Bar(x=df.index, y=df['Volume'], name='Volume', marker={'color':df['color']}),secondary_y=True)
fig3.update_yaxes(range=[0,700000000],secondary_y=True)
fig3.update_yaxes(visible=False, secondary_y=True)
fig3.update_layout(xaxis_rangeslider_visible=False)  #hide range slider
fig3.update_layout(title={'text':'Tesla', 'x':0.5})
fig3.show()

In [223]:
df[['Low','Close']].plot(figsize=(20,8))
plt.title('Tesla stock action')
plt.xlabel('Date')
plt.ylabel('stock action ')


# Use the Open Stock Price Column to Train Your Model.

In [224]:
# Use the Open Stock Price Column to Train Your Model.
# Converting data to a numpy array¶
data = df.filter(['Close'])
dataset = data.values
training_data_len = math.ceil( len(dataset) *.8) 
dataset


In [225]:
# Transforming the dataset array to range between 0 and 1¶
scaler = MinMaxScaler(feature_range=(0, 1)) 
scaled_data = scaler.fit_transform(dataset)

In [226]:
# Creating X_train and y_train Data Structures.
train_data = scaled_data[0:training_data_len  , : ]
x_train=[]
y_train = []
for i in range(60,len(train_data)):
    x_train.append(train_data[i-60:i,0])
    y_train.append(train_data[i,0])


In [227]:
# Spliting data for training and testing¶
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))


# Building the Model by Importing the Crucial Libraries and Adding Different Layers to LSTM.

In [228]:
# Building a LSTM Model for Stock Market Prediction¶
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

In [229]:
# Fitting the Model.
model.compile(optimizer='adam', loss='mean_squared_error')

In [230]:
test_data = scaled_data[training_data_len - 60: , : ]#Create the x_test and y_test data sets
x_test = []
y_test =  dataset[training_data_len : , : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data
for i in range(60,len(test_data)):
    x_test.append(test_data[i-60:i,0])

In [231]:
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))
predictions = model.predict(x_test) 
predictions = scaler.inverse_transform(predictions)

# Extracting the Actual Stock Prices of Jun-2022.

In [232]:
# Finding the root mean squared error¶
rmse=np.sqrt(np.mean(((predictions- y_test)**2)))
rmse

# Plotting the Actual and Predicted Prices for Tesla

In [233]:
# Plotting the predicted values¶
train = data[:training_data_len]
display = data[training_data_len:]
display['Predictions'] = predictions#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price INR', fontsize=18)
plt.plot(train['Close'])
plt.plot(display['Close'])
plt.plot(display['Predictions'])
plt.legend(['Train', 'Val', 'Predictions'], loc='upper right')
plt.show()
