In [419]:
import pandas as pd

import time
import math
import datetime as dt
import glob
import os

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.callbacks import EarlyStopping

In [420]:
#def sparkline(data, figsize=(4, 0.25), **kwargs):
#    data = list(data)
#
#    fig, ax = plt.subplots(1, 1, figsize=figsize, **kwargs)
#    ax.plot(data)
#    ax.fill_between(range(len(data)), data, len(data)*[min(data)], alpha=0.1)
#    ax.set_axis_off()
#    img = BytesIO()
#    plt.savefig(img)
#    plt.close()
#    return '<img src="data:image/png;base64, {}" />'.format(base64.b64encode(img.getvalue()).decode())

In [421]:
def getStockInfo(stock):
    ticker = stock
    period1 = int(time.mktime(datetime.datetime(2017,1,1,23,59).timetuple()))
    period2 = int(time.mktime(datetime.datetime(2023,1,24,23,59).timetuple()))
    interval = '1d'
    url = f'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true'
    df=pd.read_csv(url)
    df = df.assign(ticker = ticker)
    os.makedirs('companies', exist_ok=True)  
    df.to_csv(f'companies/{ticker}.csv')

In [422]:
stocks = ["AAPL","DIS","TSLA"]
for i in stocks:
    getStockInfo(i)

In [423]:
path = '/Users/joshuaheine/Desktop/stockCalculator/companies'
csv_files = glob.glob(path + "/*.csv")

df_list = (pd.read_csv(file) for file in csv_files)

big_df = pd.concat(df_list, ignore_index=True)

In [424]:
#cleaning data 
big_df = big_df.loc[:, ~big_df.columns.str.contains('^Unnamed')]
big_df.columns.str.lower()

df2 = big_df.rename(columns = {'Date' :'stock_date', 'Open':'open_price', 'High':'high_price', 'Low':'low_price', 'Close':'close_price', 'Adj Close':'adj_close',
       'Volume':'volume', 'ticker':'ticker'}, inplace = False)
df2.head()

df2.isnull()

Unnamed: 0,stock_date,open_price,high_price,low_price,close_price,adj_close,volume,ticker
0,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...
4570,False,False,False,False,False,False,False,False
4571,False,False,False,False,False,False,False,False
4572,False,False,False,False,False,False,False,False
4573,False,False,False,False,False,False,False,False


In [425]:
df2.isnull().sum()

stock_date     0
open_price     0
high_price     0
low_price      0
close_price    0
adj_close      0
volume         0
ticker         0
dtype: int64

In [426]:
df2.close_price = df2.close_price.fillna(0)
print(df2.close_price.isnull().sum())

0


In [427]:
df2 = df2.dropna() 
df2.isnull().sum()

stock_date     0
open_price     0
high_price     0
low_price      0
close_price    0
adj_close      0
volume         0
ticker         0
dtype: int64

In [428]:
print(df2)

      stock_date  open_price  high_price   low_price  close_price   adj_close  \
0     2017-01-03  105.300003  106.900002  105.209999   106.080002  101.584351   
1     2017-01-04  106.639999  107.500000  106.250000   107.440002  102.886719   
2     2017-01-05  107.169998  107.620003  106.790001   107.379997  102.829262   
3     2017-01-06  108.019997  109.349998  107.750000   108.980003  104.361458   
4     2017-01-09  108.580002  108.830002  108.110001   108.360001  103.767723   
...          ...         ...         ...         ...          ...         ...   
4570  2023-01-18  136.820007  138.610001  135.029999   135.210007  135.210007   
4571  2023-01-19  134.080002  136.250000  133.770004   135.270004  135.270004   
4572  2023-01-20  135.279999  138.020004  134.220001   137.869995  137.869995   
4573  2023-01-23  138.119995  143.320007  137.899994   141.110001  141.110001   
4574  2023-01-24  140.309998  143.160004  140.300003   142.529999  142.529999   

        volume ticker  
0  

In [429]:
result = df2.dtypes

print("Output:")
print(result)

Output:
stock_date      object
open_price     float64
high_price     float64
low_price      float64
close_price    float64
adj_close      float64
volume           int64
ticker          object
dtype: object


Apple Stock Analysis

In [430]:
df_apple = df2[df2['ticker'] == 'AAPL']
df_apple.describe()

Unnamed: 0,open_price,high_price,low_price,close_price,adj_close,volume
count,1525.0,1525.0,1525.0,1525.0,1525.0,1525.0
mean,88.423569,89.467262,87.422934,88.486936,87.08757,115093900.0
std,47.881987,48.531168,47.230062,47.89999,48.346236,53508160.0
min,28.950001,29.0825,28.690001,29.004999,27.144339,35195900.0
25%,43.970001,44.4375,43.7075,44.105,42.222614,80194000.0
50%,67.175003,68.425003,66.464996,67.692497,66.316833,100797600.0
75%,135.759995,137.330002,133.770004,135.389999,135.018158,133789600.0
max,182.630005,182.940002,179.119995,182.009995,180.959732,447940000.0


In [431]:
df_apple.head()

Unnamed: 0,stock_date,open_price,high_price,low_price,close_price,adj_close,volume,ticker
3050,2017-01-03,28.950001,29.0825,28.690001,29.0375,27.174753,115127600,AAPL
3051,2017-01-04,28.9625,29.127501,28.9375,29.004999,27.144339,84472400,AAPL
3052,2017-01-05,28.98,29.215,28.952499,29.1525,27.282377,88774400,AAPL
3053,2017-01-06,29.195,29.540001,29.1175,29.477501,27.586525,127007600,AAPL
3054,2017-01-09,29.487499,29.8575,29.485001,29.747499,27.839207,134247600,AAPL


In [432]:
fig = go.Figure([go.Scatter(x=df_apple['stock_date'], y=df_apple['close_price'])])
fig.update_layout(title='''Apple's Share average within the last 5 years''')
fig.show()

In [433]:
app_share_price_fig = px.line(df_apple, x = df_apple['stock_date'], y = [df_apple['high_price'],df_apple['low_price']], title='''Apple's Share high and low prices within the last 5 years''')
app_share_price_fig.show()

#fig2 = go.Figure([go.Scatter(x=df_apple['stock_date'], y = [df_apple['high_price'],df_apple['low_price']])])
#fig2.update_layout(title='''Apple's Share high and low prices within the last 3 years''')
#fig2.show()

In [434]:
app_candle_fig = go.Figure(data=[go.Candlestick(x=df_apple['stock_date'],
                open=df_apple['open_price'],
                high=df_apple['high_price'],
                low=df_apple['low_price'],
                close=df_apple['close_price'])])
app_candle_fig.show()

In [435]:
df_apple['close_price'].mean()

88.48693603737699

Disney Stock Analysis

In [436]:
df_dis = df2[df2['ticker'] == 'DIS']
df_dis.describe()

Unnamed: 0,open_price,high_price,low_price,close_price,adj_close,volume
count,1525.0,1525.0,1525.0,1525.0,1525.0,1525.0
mean,126.87143,128.079128,125.478282,126.781843,125.512709,10567980.0
std,27.383397,27.608893,27.099282,27.322735,28.118341,7021585.0
min,84.489998,86.690002,79.07,84.169998,84.169998,2217600.0
25%,106.360001,107.43,105.190002,106.099998,103.403831,6716800.0
50%,115.0,116.220001,113.870003,114.959999,113.529999,8558900.0
75%,141.990005,143.270004,140.740005,142.199997,141.850006,11681200.0
max,200.190002,203.020004,195.399994,201.910004,201.910004,87410700.0


In [437]:
df_dis.head()

Unnamed: 0,stock_date,open_price,high_price,low_price,close_price,adj_close,volume,ticker
0,2017-01-03,105.300003,106.900002,105.209999,106.080002,101.584351,8884300,DIS
1,2017-01-04,106.639999,107.5,106.25,107.440002,102.886719,8322000,DIS
2,2017-01-05,107.169998,107.620003,106.790001,107.379997,102.829262,6303000,DIS
3,2017-01-06,108.019997,109.349998,107.75,108.980003,104.361458,9551300,DIS
4,2017-01-09,108.580002,108.830002,108.110001,108.360001,103.767723,6195200,DIS


In [438]:
dis_share_price_fog = px.line(df_dis, x = df_dis['stock_date'], y = df_dis['close_price'], title='''Disney's Share average the last 5 years''')
dis_share_price_fog.show()

In [439]:
fig3 = px.line(df_dis, x = df_dis['stock_date'], y = [df_dis['high_price'],df_dis['low_price']], title='''Disney's Share high and low prices within the last 5 years''')
fig3.show()

In [440]:
dis_candle_fig = go.Figure(data=[go.Candlestick(x=df_dis['stock_date'],
                open=df_dis['open_price'],
                high=df_dis['high_price'],
                low=df_dis['low_price'],
                close=df_dis['close_price'])])
dis_candle_fig.show()

In [441]:
df_dis['close_price'].mean()

126.78184253311466

Tesla Stock Analysis

In [442]:
df_tsla = df2[df2['ticker'] == 'TSLA']
df_tsla.describe()

Unnamed: 0,open_price,high_price,low_price,close_price,adj_close,volume
count,1525.0,1525.0,1525.0,1525.0,1525.0,1525.0
mean,113.559963,116.155608,110.688337,113.467724,113.467724,126764700.0
std,114.858461,117.489779,111.863346,114.681997,114.681997,85138910.0
min,12.073333,12.445333,11.799333,11.931333,11.931333,29401800.0
25%,20.570667,20.931999,20.200001,20.584667,20.584667,73401000.0
50%,30.76,31.799999,29.478001,30.102667,30.102667,99397500.0
75%,223.583328,228.850006,217.613327,222.960007,222.960007,147438900.0
max,411.470001,414.496674,405.666656,409.970001,409.970001,914082000.0


In [443]:
df_tsla.head()

Unnamed: 0,stock_date,open_price,high_price,low_price,close_price,adj_close,volume,ticker
1525,2017-01-03,14.324,14.688667,14.064,14.466,14.466,88849500,TSLA
1526,2017-01-04,14.316667,15.2,14.287333,15.132667,15.132667,168202500,TSLA
1527,2017-01-05,15.094667,15.165333,14.796667,15.116667,15.116667,88675500,TSLA
1528,2017-01-06,15.128667,15.354,15.03,15.267333,15.267333,82918500,TSLA
1529,2017-01-09,15.264667,15.461333,15.2,15.418667,15.418667,59692500,TSLA


In [444]:
tsla_sp_fig = px.line(df_tsla, x = df_tsla['stock_date'], y = df_tsla['close_price'], title='''Tesla's Share average within the last 5 years''')
tsla_sp_fig.show()

In [445]:
tsla_hl_fig = px.line(df_tsla, x = df_tsla['stock_date'], y = [df_tsla['high_price'],df_tsla['low_price']], title='''Tesla's Share high and low prices within the last 5 years''')
tsla_hl_fig.show()

In [446]:
tsla_candle_fig = go.Figure(data=[go.Candlestick(x=df_tsla['stock_date'],
                open=df_tsla['open_price'],
                high=df_tsla['high_price'],
                low=df_tsla['low_price'],
                close=df_tsla['close_price'])])
tsla_candle_fig.show()

In [447]:
df_tsla['close_price'].mean()

113.46772369967215

In [448]:
df_tsla.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1525 entries, 1525 to 3049
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   stock_date   1525 non-null   object 
 1   open_price   1525 non-null   float64
 2   high_price   1525 non-null   float64
 3   low_price    1525 non-null   float64
 4   close_price  1525 non-null   float64
 5   adj_close    1525 non-null   float64
 6   volume       1525 non-null   int64  
 7   ticker       1525 non-null   object 
dtypes: float64(5), int64(1), object(2)
memory usage: 107.2+ KB


Predicting Tsla Share Price

In [449]:
# 80% to Train , 20% to Test
close_data = df_tsla['close_price'].values
close_data = close_data.reshape((-1,1))

split_percent = 0.80
split = int(split_percent*len(close_data))

close_train = close_data[:split]
close_test = close_data[split:]

date_train = df_tsla['stock_date'][:split]
date_test = df_tsla['stock_date'][split:]

print("Train Size :",len(close_train),"Test Size :",len(close_test))

Train Size : 1220 Test Size : 305


In [450]:
look_back = 15

train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

In [451]:
model = Sequential()
model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

#num_epochs 
num_epochs = 25
model.fit(train_generator, epochs=num_epochs, verbose=1)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f7ad8398ac0>

In [452]:
prediction = model.predict(test_generator)

close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = date_test,
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Actual'
)
layout = go.Layout(
    title = "TSLA Stock Prediction",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()