In [1]:
import pandas as pd
import yfinance as yf
import requests
from datetime import datetime


def get_bitcoin_prices(start_date, end_date):
    bitcoin = yf.download('BTC-USD', start=start_date, end=end_date)
    print(bitcoin.head())
    bitcoin['bitcoin_price'] = bitcoin['Close'] # old Adj Close
    return bitcoin[['bitcoin_price']]

def get_sp500_prices(start_date, end_date):
    sp500 = yf.download('^GSPC', start=start_date, end=end_date)
    sp500['sp500_price'] = sp500['Close']
    return sp500[['sp500_price']]

def get_gold_prices(start_date, end_date):
    gold = yf.download('GC=F', start=start_date, end=end_date)
    gold['gold_price'] = gold['Close']
    return gold[['gold_price']]

def get_usd_index(start_date, end_date):
    usd_index = yf.download('DX-Y.NYB', start=start_date, end=end_date)
    usd_index['usd_index'] = usd_index['Close']
    return usd_index[['usd_index']]

def get_volatility_index(start_date, end_date):
    vol_index = yf.download('^VIX', start=start_date, end=end_date)
    vol_index['volatility_index'] = vol_index['Close']
    return vol_index[['volatility_index']]

def get_interest_rates(start_date, end_date):
    irx = yf.download('^IRX', start=start_date, end=end_date)
    irx['interest_rate'] = irx['Close']
    return irx[['interest_rate']]

def get_oil_prices(start_date, end_date):
    oil = yf.download('CL=F', start=start_date, end=end_date)
    oil['oil_price'] = oil['Close']
    return oil[['oil_price']]


# Specify the date range for the dataset
start_date = '2014-09-17'
end_date = datetime.today().strftime('%Y-%m-%d')

def get_dataset(start_date, end_date, interval):
    btc_prices = get_bitcoin_prices(start_date, end_date)
    sp500_prices = get_sp500_prices(start_date, end_date)
    gold_prices = get_gold_prices(start_date, end_date)
    usd_index_prices = get_usd_index(start_date, end_date)
    oil_prices = get_oil_prices(start_date, end_date)
    vol_index = get_volatility_index(start_date, end_date)
    interest_rates = get_interest_rates(start_date, end_date)
    dataset = btc_prices.join([sp500_prices, gold_prices, usd_index_prices, oil_prices, vol_index, interest_rates], how='outer')
    
    print(dataset.isna().sum())
    dataset.ffill(inplace=True)
    dataset.dropna(inplace=True)
    dataset.index = pd.to_datetime(dataset.index, utc=True)
    dataset.index = dataset.index.date
    return dataset

# dataset.to_csv('training_datasets/bitcoin_prediction_dataset.csv')
dataset = get_dataset(start_date, end_date, '1d')
dataset.head(100)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Price            Close        High         Low        Open    Volume
Ticker         BTC-USD     BTC-USD     BTC-USD     BTC-USD   BTC-USD
Date                                                                
2014-09-17  457.334015  468.174011  452.421997  465.864014  21056800
2014-09-18  424.440002  456.859985  413.104004  456.859985  34483200
2014-09-19  394.795990  427.834991  384.532013  424.102997  37919700
2014-09-20  408.903992  423.295990  389.882996  394.673004  36863600
2014-09-21  398.821014  412.425995  393.181000  408.084991  26580100


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Price             Ticker
bitcoin_price                  0
sp500_price                 1221
gold_price                  1223
usd_index                   1220
oil_price                   1222
volatility_index            1221
interest_rate               1222
dtype: int64





Price,bitcoin_price,sp500_price,gold_price,usd_index,oil_price,volatility_index,interest_rate
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-09-17,457.334015,2001.569946,1234.400024,84.699997,94.419998,12.65,0.013
2014-09-18,424.440002,2011.359985,1225.699951,84.320000,93.070000,12.03,0.010
2014-09-19,394.795990,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
2014-09-20,408.903992,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
2014-09-21,398.821014,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
...,...,...,...,...,...,...,...
2014-12-21,320.842987,2070.649902,1195.900024,89.599998,56.520000,16.49,0.025
2014-12-22,331.885986,2078.540039,1179.699951,89.769997,55.259998,15.25,0.015
2014-12-23,334.571991,2082.169922,1177.900024,90.059998,57.119999,14.80,0.020
2014-12-24,322.533997,2081.879883,1173.500000,89.970001,55.840000,14.37,0.020


In [2]:
dataset.reset_index(drop=True, inplace=True)
dataset.head(100)

Price,bitcoin_price,sp500_price,gold_price,usd_index,oil_price,volatility_index,interest_rate
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,457.334015,2001.569946,1234.400024,84.699997,94.419998,12.65,0.013
1,424.440002,2011.359985,1225.699951,84.320000,93.070000,12.03,0.010
2,394.795990,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
3,408.903992,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
4,398.821014,2010.400024,1215.300049,84.800003,92.410004,12.11,0.007
...,...,...,...,...,...,...,...
95,320.842987,2070.649902,1195.900024,89.599998,56.520000,16.49,0.025
96,331.885986,2078.540039,1179.699951,89.769997,55.259998,15.25,0.015
97,334.571991,2082.169922,1177.900024,90.059998,57.119999,14.80,0.020
98,322.533997,2081.879883,1173.500000,89.970001,55.840000,14.37,0.020


In [3]:
# export to csv
dataset.to_csv('training_datasets/bitcoin_prediction_dataset.csv')

In [4]:
df = pd.read_csv('training_datasets/bitcoin_prediction_dataset.csv')

In [7]:
df.head()

In [5]:
start_date = '2024-10-10'
end_date = datetime.today().strftime('%Y-%m-%d')
df = pd.DataFrame()
bitcoin = yf.download('BTC-USD', start=start_date, end=end_date)
df['bitcoin_price'] = bitcoin['Adj Close']
sp500 = yf.download('^GSPC', start=start_date, end=end_date)
df['sp500_price'] = sp500['Adj Close']
gold = yf.download('GC=F', start=start_date, end=end_date)
df['gold_price'] = gold['Adj Close']
usd_index = yf.download('DX-Y.NYB', start=start_date, end=end_date)
df['usd_index'] = usd_index['Adj Close']
vol_index = yf.download('^VIX', start=start_date, end=end_date)
df['volatility_index'] = vol_index['Adj Close']
irx = yf.download('^IRX', start=start_date, end=end_date)
df['interest_rate'] = irx['Adj Close']
oil = yf.download('CL=F', start=start_date, end=end_date)
df['oil_price'] = oil['Adj Close']

# dataset = bitcoin[['bitcoin_price']].join([sp500[['sp500_price']], gold_prices, usd_index_prices, oil_prices, vol_index, interest_rates], how='outer')
df.ffill(inplace=True)
df.dropna(inplace=True)
# df.index = pd.to_datetime(dataset.index, utc=True)
#df.index = dataset.index.date
df.head()

[*********************100%***********************]  1 of 1 completed


KeyError: 'Adj Close'