In [None]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Collecting requests>=2.26
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 810 kB/s 
Collecting lxml>=4.5.1
  Downloading lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 11.0 MB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests

In [None]:
import yfinance as yf
import pandas as pd
from scipy.stats import pearsonr
from scipy.stats import spearmanr
import numpy as np
import math

In [None]:
def get_historical_klines_with_efficiancy_rate(symbol, start, interval):
  ticker = yf.Ticker(symbol)
  df = ticker.history(start=start, interval=interval)
  df.drop(['Dividends','Stock Splits'], axis=1, inplace=True)
  df['efficiency_rate'] = ((df['Close']-df['Open'])/df['Open']) * 100
  return df

def get_correlation_efficiency_rates(df1, df2, df1_feature='efficiency_rate', df2_feature='efficiency_rate'):
  merged_df = pd.merge(df1[[df1_feature]], df2[[df2_feature]], left_index=True, right_index=True, suffixes=('_df1', '_df2'))
  corr, _ = pearsonr(merged_df.iloc[:,0].tolist(), merged_df.iloc[:,1].tolist())
  return corr

In [None]:
start="2017-01-01"
interval="1d"
oil_df = get_historical_klines_with_efficiancy_rate("CL=F", start, interval)
gold_df = get_historical_klines_with_efficiancy_rate("GC=F", start, interval)
btc_df = get_historical_klines_with_efficiancy_rate("BTC-USD", start, interval)

In [None]:
btc_oil_corr = get_correlation_efficiency_rates(btc_df, oil_df)
btc_gold_corr = get_correlation_efficiency_rates(btc_df, gold_df)
btc_oil_corr, btc_gold_corr

(0.045664202001800824, 0.057604430059923634)

# with some positive and negative lags

In [None]:
btc_df['lag_1'] = btc_df['efficiency_rate'].shift(1)
btc_df['lag_2'] = btc_df['efficiency_rate'].shift(2)
btc_df['lag_3'] = btc_df['efficiency_rate'].shift(3)
btc_df['lag_4'] = btc_df['efficiency_rate'].shift(-1)
btc_df['lag_5'] = btc_df['efficiency_rate'].shift(-2)
btc_df['lag_6'] = btc_df['efficiency_rate'].shift(-3)
btc_df.dropna(inplace=True)

In [None]:
btc_oil_corrs = []
btc_gold_corrs = []
for i in range(1,7):
  btc_oil_corr = get_correlation_efficiency_rates(btc_df, oil_df, df1_feature='lag_{}'.format(i))
  btc_gold_corr = get_correlation_efficiency_rates(btc_df, gold_df, df1_feature='lag_{}'.format(i))
  btc_oil_corrs.append(btc_oil_corr)
  btc_gold_corrs.append(btc_gold_corr)

for i in range(6):
  print('BTC with Oil correlation with lag_{} : {}'.format(i+1, btc_oil_corrs[i]))
  print('BTC with gold correlation with lag_{} : {}\n'.format(i+1, btc_gold_corrs[i]))

BTC with Oil correlation with lag_1 : 0.026419112425176305
BTC with gold correlation with lag_1 : 0.014796710742684028

BTC with Oil correlation with lag_2 : -0.0011654625559363672
BTC with gold correlation with lag_2 : 0.015220475266542823

BTC with Oil correlation with lag_3 : -0.00476407771707883
BTC with gold correlation with lag_3 : -0.01656494626002636

BTC with Oil correlation with lag_4 : -0.012487795060399943
BTC with gold correlation with lag_4 : -0.04198673043908589

BTC with Oil correlation with lag_5 : -0.03533048294143138
BTC with gold correlation with lag_5 : 0.021935783801464548

BTC with Oil correlation with lag_6 : -0.020463242200007527
BTC with gold correlation with lag_6 : 0.0018606145403721928

