## Introduction
In this notebook, our focus will be on constructing a model with the ability to predict the movement of stock prices for Tesla's stock.

In [6]:
# importing necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.express as px
import matplotlib.pyplot as plt

### Getting Our Price Data

In [2]:
# get tesla stock price historical data
ticker="tsla"
tesla_prices = yf.Ticker(ticker)

In [3]:
tesla_prices = tesla_prices.history(period='max')

In [4]:
# view our stock price data
tesla_prices

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,0.0,0.0
2010-06-30 00:00:00-04:00,1.719333,2.028000,1.553333,1.588667,257806500,0.0,0.0
2010-07-01 00:00:00-04:00,1.666667,1.728000,1.351333,1.464000,123282000,0.0,0.0
2010-07-02 00:00:00-04:00,1.533333,1.540000,1.247333,1.280000,77097000,0.0,0.0
2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074000,103003500,0.0,0.0
...,...,...,...,...,...,...,...
2023-08-17 00:00:00-04:00,226.059998,226.740005,218.830002,219.220001,120718400,0.0,0.0
2023-08-18 00:00:00-04:00,214.119995,217.580002,212.360001,215.490005,135813700,0.0,0.0
2023-08-21 00:00:00-04:00,221.550003,232.130005,220.580002,231.279999,135702700,0.0,0.0
2023-08-22 00:00:00-04:00,240.250000,240.820007,229.550003,233.190002,130442800,0.0,0.0


In [5]:
# view the index of our dataframe
tesla_prices.index

DatetimeIndex(['2010-06-29 00:00:00-04:00', '2010-06-30 00:00:00-04:00',
               '2010-07-01 00:00:00-04:00', '2010-07-02 00:00:00-04:00',
               '2010-07-06 00:00:00-04:00', '2010-07-07 00:00:00-04:00',
               '2010-07-08 00:00:00-04:00', '2010-07-09 00:00:00-04:00',
               '2010-07-12 00:00:00-04:00', '2010-07-13 00:00:00-04:00',
               ...
               '2023-08-10 00:00:00-04:00', '2023-08-11 00:00:00-04:00',
               '2023-08-14 00:00:00-04:00', '2023-08-15 00:00:00-04:00',
               '2023-08-16 00:00:00-04:00', '2023-08-17 00:00:00-04:00',
               '2023-08-18 00:00:00-04:00', '2023-08-21 00:00:00-04:00',
               '2023-08-22 00:00:00-04:00', '2023-08-23 00:00:00-04:00'],
              dtype='datetime64[ns, America/New_York]', name='Date', length=3311, freq=None)

### Exploratory Data Analysis

In [10]:
# view info on our dataset
tesla_prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3311 entries, 2010-06-29 00:00:00-04:00 to 2023-08-23 00:00:00-04:00
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          3311 non-null   float64
 1   High          3311 non-null   float64
 2   Low           3311 non-null   float64
 3   Close         3311 non-null   float64
 4   Volume        3311 non-null   int64  
 5   Dividends     3311 non-null   float64
 6   Stock Splits  3311 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 206.9 KB


In [11]:
# view description of our dataset
tesla_prices.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
count,3311.0,3311.0,3311.0,3311.0,3311.0,3311.0,3311.0
mean,65.871775,67.341062,64.279954,65.847451,96186450.0,0.0,0.002416
std,98.827461,101.06562,96.341441,98.734594,80973010.0,0.0,0.101322
min,1.076,1.108667,0.998667,1.053333,1777500.0,0.0,0.0
25%,9.976667,10.221,9.745,9.920333,44940750.0,0.0,0.0
50%,16.675333,16.926001,16.440001,16.691999,79414500.0,0.0,0.0
75%,54.723,55.476,53.256332,54.164,122726200.0,0.0,0.0
max,411.470001,414.496674,405.666656,409.970001,914082000.0,0.0,5.0


In [9]:
# plot historic closing prices
fig = px.line(tesla_prices, x=tesla_prices.index, y=tesla_prices['Close'], title='Tesla Closing Price Trend')
fig.show()

In [22]:
# View the distribution of our daily trading volumes
px.histogram(data_frame=tesla_prices, x='Volume',nbins=20, title='Distribution of Trading Volume')

In [23]:
# create columns for moving avearages
tesla_prices['30-day MA'] = tesla_prices['Close'].rolling(window=30).mean()
tesla_prices['60-day MA'] = tesla_prices['Close'].rolling(window=60).mean()
tesla_prices['90-day MA'] = tesla_prices['Close'].rolling(window=90).mean()

In [25]:
tesla_prices.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow,Target,30-day MA,60-day MA,90-day MA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-08-17 00:00:00-04:00,226.059998,226.740005,218.830002,219.220001,120718400,215.490005,0,260.283333,249.444,223.361444
2023-08-18 00:00:00-04:00,214.119995,217.580002,212.360001,215.490005,135813700,231.279999,1,258.318667,249.939333,223.680333
2023-08-21 00:00:00-04:00,221.550003,232.130005,220.580002,231.279999,135702700,233.190002,1,257.041,250.745667,224.244111
2023-08-22 00:00:00-04:00,240.25,240.820007,229.550003,233.190002,130442800,236.699997,1,255.821,251.557667,224.769555
2023-08-23 00:00:00-04:00,229.339996,237.139999,229.289993,236.699997,69154809,,0,254.644667,252.283167,225.344


In [33]:
# view our moving average values on a line chart
fig = px.line(data_frame=tesla_prices, x=tesla_prices.index, y=['30-day MA', '60-day MA', '90-day MA'], 
              title='Moving Average Trends')
fig.show()

The histogram illustrates that the trading volume distribution of Tesla stock deviates from a normal distribution, displaying a noticeable leftward skew. Most trading days witness trading volumes in the range of 50 to 100 million units of Tesla stock.

In [12]:
tesla_prices[tesla_prices['Stock Splits'] != 0]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-08-31 00:00:00-04:00,148.203339,166.713333,146.703339,166.106674,355123200,0.0,5.0
2022-08-25 00:00:00-04:00,302.359985,302.959991,291.600006,296.070007,53230000,0.0,3.0


In [13]:
# dropping the dividends and stock splits columns
del tesla_prices['Stock Splits'] 
del tesla_prices['Dividends']

In [14]:
tesla_prices.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500
2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500
2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000
2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000
2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500


In [15]:
# add a column to show the next day's closing price
tesla_prices['Tomorrow'] = tesla_prices['Close'].shift(-1)

In [16]:
tesla_prices.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,1.588667
2010-06-30 00:00:00-04:00,1.719333,2.028,1.553333,1.588667,257806500,1.464
2010-07-01 00:00:00-04:00,1.666667,1.728,1.351333,1.464,123282000,1.28
2010-07-02 00:00:00-04:00,1.533333,1.54,1.247333,1.28,77097000,1.074
2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074,103003500,1.053333


In [17]:
# create a target column to check if the price went up or dow from the previous day
tesla_prices['Target'] = (tesla_prices['Tomorrow'] > tesla_prices['Close']).astype('int')

In [19]:
tesla_prices

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Tomorrow,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-06-29 00:00:00-04:00,1.266667,1.666667,1.169333,1.592667,281494500,1.588667,0
2010-06-30 00:00:00-04:00,1.719333,2.028000,1.553333,1.588667,257806500,1.464000,0
2010-07-01 00:00:00-04:00,1.666667,1.728000,1.351333,1.464000,123282000,1.280000,0
2010-07-02 00:00:00-04:00,1.533333,1.540000,1.247333,1.280000,77097000,1.074000,0
2010-07-06 00:00:00-04:00,1.333333,1.333333,1.055333,1.074000,103003500,1.053333,0
...,...,...,...,...,...,...,...
2023-08-17 00:00:00-04:00,226.059998,226.740005,218.830002,219.220001,120718400,215.490005,0
2023-08-18 00:00:00-04:00,214.119995,217.580002,212.360001,215.490005,135813700,231.279999,1
2023-08-21 00:00:00-04:00,221.550003,232.130005,220.580002,231.279999,135702700,233.190002,1
2023-08-22 00:00:00-04:00,240.250000,240.820007,229.550003,233.190002,130442800,236.699997,1


We have added a target column that is 1 when the closing price went up from the previous day and 0 when it went down.