In [None]:
import pandas as pd
import yfinance as yf
import requests
from datetime import datetime


def get_bitcoin_prices(start_date, end_date):
    bitcoin = yf.download('BTC-USD', start=start_date, end=end_date)
    print(bitcoin.head())
    bitcoin['bitcoin_price'] = bitcoin['Close'] # old Adj Close
    return bitcoin[['bitcoin_price']]

def get_sp500_prices(start_date, end_date):
    sp500 = yf.download('^GSPC', start=start_date, end=end_date)
    sp500['sp500_price'] = sp500['Close']
    return sp500[['sp500_price']]

def get_gold_prices(start_date, end_date):
    gold = yf.download('GC=F', start=start_date, end=end_date)
    gold['gold_price'] = gold['Close']
    return gold[['gold_price']]

def get_usd_index(start_date, end_date):
    usd_index = yf.download('DX-Y.NYB', start=start_date, end=end_date)
    usd_index['usd_index'] = usd_index['Close']
    return usd_index[['usd_index']]

def get_volatility_index(start_date, end_date):
    vol_index = yf.download('^VIX', start=start_date, end=end_date)
    vol_index['volatility_index'] = vol_index['Close']
    return vol_index[['volatility_index']]

def get_interest_rates(start_date, end_date):
    irx = yf.download('^IRX', start=start_date, end=end_date)
    irx['interest_rate'] = irx['Close']
    return irx[['interest_rate']]

def get_oil_prices(start_date, end_date):
    oil = yf.download('CL=F', start=start_date, end=end_date)
    oil['oil_price'] = oil['Close']
    return oil[['oil_price']]


# Specify the date range for the dataset
start_date = '2014-09-17'
end_date = datetime.today().strftime('%Y-%m-%d')

def get_dataset(start_date, end_date, interval):
    btc_prices = get_bitcoin_prices(start_date, end_date)
    sp500_prices = get_sp500_prices(start_date, end_date)
    gold_prices = get_gold_prices(start_date, end_date)
    usd_index_prices = get_usd_index(start_date, end_date)
    oil_prices = get_oil_prices(start_date, end_date)
    vol_index = get_volatility_index(start_date, end_date)
    interest_rates = get_interest_rates(start_date, end_date)
    dataset = btc_prices.join([sp500_prices, gold_prices, usd_index_prices, oil_prices, vol_index, interest_rates], how='outer')
    
    print(dataset.isna().sum())
    dataset.ffill(inplace=True)
    dataset.dropna(inplace=True)
    dataset.index = pd.to_datetime(dataset.index, utc=True)
    dataset.index = dataset.index.date
    return dataset

# dataset.to_csv('training_datasets/bitcoin_prediction_dataset.csv')
dataset = get_dataset(start_date, end_date, '1d')
dataset.head(100)

In [None]:
dataset.reset_index(drop=True, inplace=True)
dataset.head(100)

In [None]:
# export to csv
dataset.to_csv('training_datasets/bitcoin_prediction_dataset.csv')

In [None]:
df = pd.read_csv('training_datasets/bitcoin_prediction_dataset.csv')

In [None]:
start_date = '2024-10-10'
end_date = datetime.today().strftime('%Y-%m-%d')
df = pd.DataFrame()
bitcoin = yf.download('BTC-USD', start=start_date, end=end_date)
df['bitcoin_price'] = bitcoin['Adj Close']
sp500 = yf.download('^GSPC', start=start_date, end=end_date)
df['sp500_price'] = sp500['Adj Close']
gold = yf.download('GC=F', start=start_date, end=end_date)
df['gold_price'] = gold['Adj Close']
usd_index = yf.download('DX-Y.NYB', start=start_date, end=end_date)
df['usd_index'] = usd_index['Adj Close']
vol_index = yf.download('^VIX', start=start_date, end=end_date)
df['volatility_index'] = vol_index['Adj Close']
irx = yf.download('^IRX', start=start_date, end=end_date)
df['interest_rate'] = irx['Adj Close']
oil = yf.download('CL=F', start=start_date, end=end_date)
df['oil_price'] = oil['Adj Close']

# dataset = bitcoin[['bitcoin_price']].join([sp500[['sp500_price']], gold_prices, usd_index_prices, oil_prices, vol_index, interest_rates], how='outer')
df.ffill(inplace=True)
df.dropna(inplace=True)
# df.index = pd.to_datetime(dataset.index, utc=True)
#df.index = dataset.index.date
df.head()