In [14]:
import pandas as pd 
import sqlalchemy as sql
import alpaca_trade_api as tradeapi 
import os
from dotenv import load_dotenv 
from pandas_datareader import data  #remember to pip install pandas-datareader
import pandas_datareader as pdr 
import math 
import numpy as np 

%matplotlib inline 

In [2]:
#downloading data from yahoo (USO, TSLA and SPY)

symbol='USO','TSLA', 'SPY', 'CL=F'
start='2010-06-29'
end='2021-10-15'

df=data.get_data_yahoo(symbol, start, end)['Close']
df.head()

Symbols,USO,TSLA,SPY,CL=F
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-06-29,273.359985,4.778,104.209999,75.940002
2010-06-30,271.679993,4.766,103.220001,75.629997
2010-07-01,262.959991,4.392,102.760002,72.949997
2010-07-02,260.799988,3.84,102.199997,72.139999
2010-07-06,260.880005,3.222,102.870003,71.980003


In [3]:
#creating percent change columns in df

df['USO_pct_change']= df['USO'].pct_change()
df['TSLA_pct_change']= df['TSLA'].pct_change()
df['SPY_pct_change']= df['SPY'].pct_change()
df['CL=F_pct_change']= df['CL=F'].pct_change()


df.describe()


Symbols,USO,TSLA,SPY,CL=F,USO_pct_change,TSLA_pct_change,SPY_pct_change,CL=F_pct_change
count,2846.0,2846.0,2846.0,2823.0,2845.0,2845.0,2845.0,2845.0
mean,164.771809,106.11251,226.313131,68.814733,-0.000287,0.002437,0.000568,-0.000936
std,102.387174,189.260934,82.173761,22.907566,0.0227,0.035274,0.010644,0.067501
min,17.040001,3.16,102.199997,-37.630001,-0.25315,-0.210628,-0.109424,-3.059661
25%,85.120003,10.464,158.595005,49.715,-0.010686,-0.014816,-0.003469,-0.010779
50%,112.760002,45.909,209.770004,63.73,0.00069,0.001255,0.000649,0.000616
75%,276.460007,65.295502,279.069992,91.975002,0.011073,0.019081,0.005452,0.011738
max,361.200012,883.090027,453.190002,113.93,0.166667,0.243951,0.090603,0.376623


In [4]:
# Working with SQL in order to not ping the Yahoo too much  
#Creating SQL connection/engine

database_connection_string = 'sqlite:///'
engine = sql.create_engine(database_connection_string)
engine

Engine(sqlite:///)

In [5]:
#shoving closing_prices_df from Yahoo into a SQL Table 

df.to_sql('closing_prices', engine, index= True, if_exists='replace' )


In [6]:
#Code to grab the data from the SQL table into a pandas df to analyze further 

closing_prices_df = pd.read_sql_table ('closing_prices', con=engine)


In [7]:
closing_prices_df.describe()

Unnamed: 0,USO,TSLA,SPY,CL=F,USO_pct_change,TSLA_pct_change,SPY_pct_change,CL=F_pct_change
count,2846.0,2846.0,2846.0,2823.0,2845.0,2845.0,2845.0,2845.0
mean,164.771809,106.11251,226.313131,68.814733,-0.000287,0.002437,0.000568,-0.000936
std,102.387174,189.260934,82.173761,22.907566,0.0227,0.035274,0.010644,0.067501
min,17.040001,3.16,102.199997,-37.630001,-0.25315,-0.210628,-0.109424,-3.059661
25%,85.120003,10.464,158.595005,49.715,-0.010686,-0.014816,-0.003469,-0.010779
50%,112.760002,45.909,209.770004,63.73,0.00069,0.001255,0.000649,0.000616
75%,276.460007,65.295502,279.069992,91.975002,0.011073,0.019081,0.005452,0.011738
max,361.200012,883.090027,453.190002,113.93,0.166667,0.243951,0.090603,0.376623


In [8]:
closing_prices_df.head()

Unnamed: 0,Date,USO,TSLA,SPY,CL=F,USO_pct_change,TSLA_pct_change,SPY_pct_change,CL=F_pct_change
0,2010-06-29,273.359985,4.778,104.209999,75.940002,,,,
1,2010-06-30,271.679993,4.766,103.220001,75.629997,-0.006146,-0.002512,-0.0095,-0.004082
2,2010-07-01,262.959991,4.392,102.760002,72.949997,-0.032097,-0.078472,-0.004456,-0.035436
3,2010-07-02,260.799988,3.84,102.199997,72.139999,-0.008214,-0.125683,-0.00545,-0.011103
4,2010-07-06,260.880005,3.222,102.870003,71.980003,0.000307,-0.160938,0.006556,-0.002218


In [10]:
#calcualting correlations between assets 

df_correlation=closing_prices_df[['USO_pct_change', 'TSLA_pct_change', 'SPY_pct_change']]
df_correlation.corr (method='pearson')

Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.196624,0.398108
TSLA_pct_change,0.196624,1.0,0.393186
SPY_pct_change,0.398108,0.393186,1.0


In [12]:
df_correlation_first_700 = df_correlation[:700]
df_correlation_1100_1400 = df_correlation[1100:1400]
df_correlation_2400_2500 = df_correlation[2400:2500]
df_correlation_2500 = df_correlation[2500:]


In [13]:

print('Correlation for the first 700 days')
display(df_correlation_first_700.corr(method='pearson'))
print ('Correlation for days 1100 - 1400')
display(df_correlation_1100_1400.corr(method='pearson'))
print('Correlation for days 2400-2500')
display(df_correlation_2400_2500.corr(method='pearson'))
print('Correlation after 2500')
display(df_correlation_2500.corr(method='pearson'))
print('Whole data set')
df_correlation.corr (method='pearson')

Correlation for the first 700 days


Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.265759,0.572316
TSLA_pct_change,0.265759,1.0,0.388257
SPY_pct_change,0.572316,0.388257,1.0


Correlation for days 1100 - 1400


Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.19939,0.324372
TSLA_pct_change,0.19939,1.0,0.490492
SPY_pct_change,0.324372,0.490492,1.0


Correlation for days 2400-2500


Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.318407,0.404437
TSLA_pct_change,0.318407,1.0,0.521912
SPY_pct_change,0.404437,0.521912,1.0


Correlation after 2500


Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.171341,0.402591
TSLA_pct_change,0.171341,1.0,0.44624
SPY_pct_change,0.402591,0.44624,1.0


Whole data set


Unnamed: 0,USO_pct_change,TSLA_pct_change,SPY_pct_change
USO_pct_change,1.0,0.196624,0.398108
TSLA_pct_change,0.196624,1.0,0.393186
SPY_pct_change,0.398108,0.393186,1.0
