In [1]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from plotly import graph_objects as go
import numpy as np
import talib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [4]:
def get_coinbase_data(url, startDate):
    json = []
    dtObj = datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S')
    dtObjEnd = dtObj + timedelta(days=200)
    bypass = True

    while len(json) > 0 or bypass:
        bypass = False       
        dtTimeStart = "{:.0f}".format(dtObj.timestamp())
        dtTimeEnd = "{:.0f}".format(dtObjEnd.timestamp())
        print(f'Start: {dtTimeStart}')
        print(f'End: {dtTimeEnd}')
        params = {
            'start': dtTimeStart,
            'end': dtTimeEnd,
            'granularity': 86400
        }
        print(params)
        
        # Sending GET request to Coinbase API
        response = requests.get(url, params=params)
        time.sleep(3)
        if response.status_code == 200:
            respJson = response.json()
            if len(respJson) > 0:
                for resp in respJson:
                    json.append(resp)
                dtObj = dtObjEnd + timedelta(days=1)
                dtObjEnd = dtObj + timedelta(days=200)
            else:
                break
        else:
            print(f"Error! {response.json()}")
            break

    return json

In [5]:
# CoinGecko API endpoint for historical Bitcoin price
url = 'https://api.pro.coinbase.com/products/BTC-USD/candles'
dtStart = '2017-01-01 00:00:00'
data = get_coinbase_data(url, dtStart)

Start: 1483228800
End: 1500505200
{'start': '1483228800', 'end': '1500505200', 'granularity': 86400}


Start: 1500591600
End: 1517875200
{'start': '1500591600', 'end': '1517875200', 'granularity': 86400}
Start: 1517961600
End: 1535238000
{'start': '1517961600', 'end': '1535238000', 'granularity': 86400}
Start: 1535324400
End: 1552608000
{'start': '1535324400', 'end': '1552608000', 'granularity': 86400}
Start: 1552694400
End: 1569970800
{'start': '1552694400', 'end': '1569970800', 'granularity': 86400}
Start: 1570057200
End: 1587337200
{'start': '1570057200', 'end': '1587337200', 'granularity': 86400}
Start: 1587423600
End: 1604707200
{'start': '1587423600', 'end': '1604707200', 'granularity': 86400}
Start: 1604793600
End: 1622070000
{'start': '1604793600', 'end': '1622070000', 'granularity': 86400}
Start: 1622156400
End: 1639440000
{'start': '1622156400', 'end': '1639440000', 'granularity': 86400}
Start: 1639526400
End: 1656802800
{'start': '1639526400', 'end': '1656802800', 'granularity': 86400}
Start: 1656889200
End: 1674172800
{'start': '1656889200', 'end': '1674172800', 'granularity

In [6]:
data

[[1500422400, 2216, 2396, 2307.48, 2258.99, 18567.47611252],
 [1500336000, 2134.37, 2393.43, 2235.14, 2308.15, 23795.59414253],
 [1500249600, 1909.58, 2238.07, 1911.79, 2235.19, 21436.7645371],
 [1500163200, 1758.2, 2043.94, 1964.33, 1911.78, 21740.89274035],
 [1500076800, 1964.31, 2224.82, 2217.02, 1964.31, 16334.68864717],
 [1499990400, 2150, 2349.28, 2340, 2217.24, 13549.1778099],
 [1499904000, 2308.14, 2416.79, 2384.35, 2340, 9541.53943311],
 [1499817600, 2250, 2415, 2310, 2383.42, 13834.47197463],
 [1499731200, 2265, 2410, 2330.93, 2310.01, 15670.55064724],
 [1499644800, 2250, 2523, 2508, 2331.05, 15554.0050949],
 [1499558400, 2500, 2579.84, 2560.84, 2508.99, 5647.43426949],
 [1499472000, 2471.22, 2563.5, 2501.15, 2561.11, 7869.88131277],
 [1499385600, 2472, 2607.61, 2604.89, 2501.15, 11640.78157115],
 [1499299200, 2580.69, 2623.54, 2616.96, 2604.84, 8207.99657687],
 [1499212800, 2551.06, 2630.24, 2602, 2616.96, 10183.63010323],
 [1499126400, 2542.18, 2636.1, 2542.41, 2602, 10680.

In [7]:
df = pd.DataFrame(data=data,columns=['date', 'low', 'high', 'open', 'close', 'volume'])
df

Unnamed: 0,date,low,high,open,close,volume
0,1500422400,2216.00,2396.00,2307.48,2258.99,18567.476113
1,1500336000,2134.37,2393.43,2235.14,2308.15,23795.594143
2,1500249600,1909.58,2238.07,1911.79,2235.19,21436.764537
3,1500163200,1758.20,2043.94,1964.33,1911.78,21740.892740
4,1500076800,1964.31,2224.82,2217.02,1964.31,16334.688647
...,...,...,...,...,...,...
2633,1709337600,61623.39,62500.00,62439.74,62045.78,6882.384103
2634,1709251200,60788.33,63267.18,61179.03,62436.72,24696.656925
2635,1709164800,60355.61,63675.48,62520.06,61179.03,36825.895351
2636,1709078400,56723.48,64100.00,57077.08,62518.90,48265.672117


In [8]:
df['date'] = df['date'].apply(lambda x: datetime.fromtimestamp(x))
df.head()

Unnamed: 0,date,low,high,open,close,volume
0,2017-07-19 01:00:00,2216.0,2396.0,2307.48,2258.99,18567.476113
1,2017-07-18 01:00:00,2134.37,2393.43,2235.14,2308.15,23795.594143
2,2017-07-17 01:00:00,1909.58,2238.07,1911.79,2235.19,21436.764537
3,2017-07-16 01:00:00,1758.2,2043.94,1964.33,1911.78,21740.89274
4,2017-07-15 01:00:00,1964.31,2224.82,2217.02,1964.31,16334.688647


In [9]:
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,low,high,open,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-19 01:00:00,2216.0,2396.0,2307.48,2258.99,18567.476113
2017-07-18 01:00:00,2134.37,2393.43,2235.14,2308.15,23795.594143
2017-07-17 01:00:00,1909.58,2238.07,1911.79,2235.19,21436.764537
2017-07-16 01:00:00,1758.2,2043.94,1964.33,1911.78,21740.89274
2017-07-15 01:00:00,1964.31,2224.82,2217.02,1964.31,16334.688647


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2638 entries, 2017-07-19 01:00:00 to 2024-02-27 00:00:00
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   low     2638 non-null   float64
 1   high    2638 non-null   float64
 2   open    2638 non-null   float64
 3   close   2638 non-null   float64
 4   volume  2638 non-null   float64
dtypes: float64(5)
memory usage: 123.7 KB


In [11]:
df.sort_index(ascending=True)
df

Unnamed: 0_level_0,low,high,open,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-19 01:00:00,2216.00,2396.00,2307.48,2258.99,18567.476113
2017-07-18 01:00:00,2134.37,2393.43,2235.14,2308.15,23795.594143
2017-07-17 01:00:00,1909.58,2238.07,1911.79,2235.19,21436.764537
2017-07-16 01:00:00,1758.20,2043.94,1964.33,1911.78,21740.892740
2017-07-15 01:00:00,1964.31,2224.82,2217.02,1964.31,16334.688647
...,...,...,...,...,...
2024-03-02 00:00:00,61623.39,62500.00,62439.74,62045.78,6882.384103
2024-03-01 00:00:00,60788.33,63267.18,61179.03,62436.72,24696.656925
2024-02-29 00:00:00,60355.61,63675.48,62520.06,61179.03,36825.895351
2024-02-28 00:00:00,56723.48,64100.00,57077.08,62518.90,48265.672117


In [12]:
figure = go.Figure(data=[go.Candlestick(x=df.index, 
                                        open=df['open'], 
                                        high=df['high'], 
                                        low=df['low'], 
                                        close=df['close'])], 
                              layout=go.Layout(title={'text': 'Bitcoin Daily Closing Prices'},
                                               height=600))

figure.show()

In [13]:
# Calculate RSI
df['RSI'] = talib.RSI(df['close'], timeperiod=14)

In [14]:
# Calculate MACD
macd, signal, _ = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
df['MACD'] = macd - signal

In [15]:
# Define features (RSI and MACD) and target (future price)
features = df[['RSI', 'MACD']].values
target = df['close'].shift(-1).values  # Shifted by 1 to predict future price

In [16]:
# Drop the last row to align features and target
features = features[:-1]
target = target[:-1]

In [17]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [18]:

# Train a Random Forest regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [19]:
# Make predictions
predictions = model.predict(X_test)

In [20]:
# Evaluate the model
mse = mean_squared_error(y_test, predictions)
print('Mean Squared Error:', mse)

Mean Squared Error: 154047505.4377573
