## Building a Radius Neighbor Regressor

Dataset: Bitcoin data from yfinance api

In [2]:
import yfinance

symbol = "BTC-USD"

start = "2014-01-01"

end = "2023-09-01"

dataframe= yfinance.download(symbol,start, end)

dataframe

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100
...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235


In [3]:
dataframe.to_csv("btc-usd-2014-2023.csv")

In [4]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3271 entries, 2014-09-17 to 2023-08-31
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       3271 non-null   float64
 1   High       3271 non-null   float64
 2   Low        3271 non-null   float64
 3   Close      3271 non-null   float64
 4   Adj Close  3271 non-null   float64
 5   Volume     3271 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 178.9 KB


In [5]:
import numpy as np

### Adding more features to DataFrame

In [7]:
BUY = 1
SELL = 0

dataframe["Buy or Sell(Adj Close)"] = np.where(dataframe["Adj Close"].shift(-1) > dataframe["Adj Close"],
                                               BUY,
                                               SELL)
dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell(Adj Close)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1
...,...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611,1
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166,1
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712,0
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235,0


In [8]:
dataframe["Buy or Sell(Open)"] = np.where(dataframe["Open"].shift(-1) > dataframe["Open"],
                                               BUY,
                                               SELL)

dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell(Adj Close),Buy or Sell(Open)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0
...,...,...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611,1,1
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166,1,1
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712,0,1
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235,0,0


In [9]:
# percentage return based on the price of the previous day

dataframe["Returns"] = dataframe["Adj Close"].pct_change()
dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell(Adj Close),Buy or Sell(Open),Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0,
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659
...,...,...,...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611,1,1,0.003123
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166,1,1,0.000631
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712,0,1,0.062102
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235,0,0,-0.015513


In [10]:
# Volume increase or decrease
INCREASED = 1

DECREASED = 0

dataframe["Volume Increase or Decrease"] = np.where(dataframe["Volume"].shift(-1) > dataframe["Volume"],
                                                    INCREASED,
                                                    DECREASED)

dataframe


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell(Adj Close),Buy or Sell(Open),Returns,Volume Increase or Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800,0,0,,1
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926,1
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659,0
...,...,...,...,...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611,1,1,0.003123,1
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166,1,1,0.000631,1
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712,0,1,0.062102,0
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235,0,0,-0.015513,1


### Define Features and Labels

In [17]:
dataframe = dataframe.dropna()
dataframe

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Buy or Sell(Adj Close),Buy or Sell(Open),Returns,Volume Increase or Decrease
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200,0,0,-0.071926,1
2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700,1,0,-0.069843,0
2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600,0,1,0.035735,0
2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100,1,0,-0.024659,0
2014-09-22,399.100006,406.915985,397.130005,402.152008,402.152008,24127600,1,1,0.008352,1
...,...,...,...,...,...,...,...,...,...,...
2023-08-27,26008.242188,26165.373047,25965.097656,26089.693359,26089.693359,6913768611,1,1,0.003123,1
2023-08-28,26089.615234,26198.578125,25880.599609,26106.150391,26106.150391,11002805166,1,1,0.000631,1
2023-08-29,26102.486328,28089.337891,25912.628906,27727.392578,27727.392578,29368391712,0,1,0.062102,0
2023-08-30,27726.083984,27760.160156,27069.207031,27297.265625,27297.265625,16343655235,0,0,-0.015513,1


In [18]:
INDEX_OF_RETURNS_COLUMN = 8
y = dataframe.iloc[:,INDEX_OF_RETURNS_COLUMN].values
y

array([-0.07192558, -0.06984265,  0.03573492, ...,  0.06210192,
       -0.01551271, -0.05003406])

In [19]:
INDEX_OF_BUY_SELL_COLUMNS_ADJ_CLOSE = 6
X = dataframe.iloc[:, INDEX_OF_BUY_SELL_COLUMNS_ADJ_CLOSE: INDEX_OF_RETURNS_COLUMN].values
X

array([[0, 0],
       [1, 0],
       [0, 1],
       ...,
       [0, 1],
       [0, 0],
       [0, 0]])

### Train- Test Split

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y)

In [21]:
X_train.shape

(2452, 2)

In [22]:
y_train.shape

(2452,)

In [23]:
X_test.shape

(818, 2)

In [24]:
y_test.shape

(818,)

### Build the Radius Neighbors REgressor Model

In [25]:
from sklearn.neighbors import RadiusNeighborsRegressor

model = RadiusNeighborsRegressor(radius=0.1)

model

Training the Model

In [26]:
model.fit(X_train, y_train)

In [27]:
predicted_y = model.predict(X_test)
predicted_y

array([ 0.0266633 ,  0.02432867,  0.02432867,  0.0266633 ,  0.02432867,
       -0.02488703,  0.0266633 , -0.02307713, -0.02488703, -0.02307713,
       -0.02307713,  0.02432867,  0.02432867, -0.02488703,  0.02432867,
       -0.02307713, -0.02488703, -0.02307713,  0.0266633 ,  0.02432867,
        0.02432867, -0.02488703,  0.02432867, -0.02488703,  0.0266633 ,
       -0.02488703,  0.02432867,  0.02432867, -0.02488703,  0.02432867,
        0.02432867,  0.02432867,  0.0266633 ,  0.0266633 , -0.02307713,
        0.0266633 ,  0.02432867, -0.02488703, -0.02488703,  0.0266633 ,
       -0.02307713, -0.02488703,  0.0266633 ,  0.02432867,  0.0266633 ,
       -0.02307713, -0.02488703,  0.02432867, -0.02488703,  0.0266633 ,
        0.02432867,  0.02432867, -0.02488703, -0.02488703, -0.02307713,
       -0.02488703,  0.0266633 ,  0.02432867,  0.0266633 , -0.02488703,
       -0.02307713, -0.02307713, -0.02488703, -0.02488703,  0.0266633 ,
       -0.02488703, -0.02488703,  0.0266633 ,  0.02432867,  0.02

In [28]:
model.score(X_test, y_test)

0.4049483163072276

### Search for Optimum Radius

In [32]:
MINIMUM_RADIUS = 0.01
MAXIMUM_RADIUS = 0.1
INCREMENT = 0.01

for radius in np.arange(MINIMUM_RADIUS, MAXIMUM_RADIUS, INCREMENT):
    model = RadiusNeighborsRegressor(radius= radius)
    model.fit(X_train, y_train)
    formatted_radius = "{:.2f}".format(radius)
    print("radius: ", formatted_radius," score", model.score(X_test, y_test))



radius:  0.1  score 0.4049483163072276
radius:  0.2  score 0.4049483163072276
radius:  0.3  score 0.4049483163072276
radius:  0.4  score 0.4049483163072276
radius:  0.5  score 0.4049483163072276
radius:  0.6  score 0.4049483163072276
radius:  0.7  score 0.4049483163072276
radius:  0.8  score 0.4049483163072276
radius:  0.9  score 0.4049483163072276
radius:  1.0  score 0.23318552448967256
radius:  1.1  score 0.23318552448967256
radius:  1.2  score 0.23318552448967256
radius:  1.3  score 0.23318552448967256
radius:  1.4  score 0.23318552448967256
