In [24]:
# imports
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [27]:
matrix_array_20 = np.load('Data/matrix_array_20_normalized.npy')
# answer_array_20 = np.load('Data/answer_array_20.npy')

# List of file names to load
file_names_answer = [f"Data/answer_array_20_{i}.npy" for i in range(11)]

# Load each file and store in a list
loaded_arrays_answer = [np.load(file_name) for file_name in file_names_answer]

# Concatenate all arrays into a single array
answer_array_20 = np.concatenate(loaded_arrays_answer, axis=0)

In [25]:
matrix_array_20.shape, answer_array_20.shape    

((587563, 20, 19), (587563, 3))

In [26]:
from sklearn.model_selection import train_test_split

X = matrix_array_20
y = answer_array_20


X_train, X_temp, y_train, y_temp = train_test_split(X,y, test_size=0.8, random_state=1, stratify=y)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1, stratify=y_temp)

In [10]:
from datetime import datetime, timezone

# Convert ISO 8601 date strings to Unix timestamp (milliseconds)
def iso_to_unix(iso_str):
    dt = datetime.strptime(iso_str, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
    return int(dt.timestamp() * 1000)  # Convert to milliseconds

def unix_to_iso(unix_timestamp_ms):
    # Convert milliseconds to seconds
    unix_timestamp_s = unix_timestamp_ms / 1000
    # Create a datetime object from the Unix timestamp
    dt = datetime.utcfromtimestamp(unix_timestamp_s)
    # Format the datetime object as an ISO 8601 date string
    iso_str = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
    return iso_str

In [72]:
data = pd.read_csv('Data/historical_BTCUSDT_3min_data.csv')
data_2023 = data[data['Open time']> iso_to_unix('2023-01-01T00:00:00Z')]
data_2023.to_csv('Data/2023_BTC_data.csv', index=False)
data_2023 = data_2023.drop(['Close time', 'Ignore'], axis=1)
data_2023_no_opentime = data_2023.drop(['Open time'], axis=1)
data_2023

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume
408507,1672531380000,16536.43,16537.80,16527.51,16536.70,313.76220,5.187489e+06,8881,156.83035,2.592933e+06
408508,1672531560000,16536.76,16540.43,16528.00,16529.28,248.31133,4.105756e+06,7126,119.21672,1.971200e+06
408509,1672531740000,16529.28,16530.87,16522.55,16525.84,229.23089,3.788275e+06,7984,106.55634,1.760959e+06
408510,1672531920000,16525.84,16526.46,16520.00,16520.69,154.79753,2.557735e+06,5986,76.53916,1.264679e+06
408511,1672532100000,16521.26,16526.84,16517.72,16522.48,301.41020,4.979816e+06,9001,165.45431,2.733620e+06
...,...,...,...,...,...,...,...,...,...,...
590396,1705276080000,41974.12,41987.22,41960.00,41980.24,66.95987,2.810908e+06,1570,20.29217,8.517988e+05
590397,1705276260000,41980.25,41980.25,41761.10,41761.11,162.07337,6.788964e+06,3466,72.52297,3.038316e+06
590398,1705276440000,41761.10,41864.42,41720.00,41837.73,424.45409,1.773590e+07,5239,199.00224,8.315683e+06
590399,1705276620000,41837.73,41850.00,41732.35,41732.35,135.98620,5.685374e+06,2422,59.46099,2.485873e+06


In [58]:
# Define the window sizes for the moving averages
windows = [5, 10, 20, 30, 60, 120, 240]
# Define the window size and standard deviation multiplier for the Bollinger Bands
BB_window_size = 90
BB_std_multiplier = 1
# Define the range of future candles
future_start = 20
future_end = 24


# ADDING MA
for window in windows:
    # Calculate the moving average
    moving_avg = data_2023['Close'].rolling(window).mean()
    # Add the moving average as a new column to the data_2023
    data_2023[f'MA{window}'] = moving_avg

# Calculate the moving average and standard deviation
BB_moving_avg = data_2023['Close'].rolling(BB_window_size).mean()
BB_std_dev = data_2023['Close'].rolling(BB_window_size).std()

# ADDING BB
# Calculate the Bollinger Bands
BB_upper_band = BB_moving_avg + (BB_std_multiplier * BB_std_dev)
BB_lower_band = BB_moving_avg - (BB_std_multiplier * BB_std_dev)

# Add the Bollinger Bands and moving average as new columns to the data_2023
data_2023[f'MA{BB_window_size}'] = BB_moving_avg
data_2023[f'Upper_Band{BB_window_size}'] = BB_upper_band
data_2023[f'Lower_Band{BB_window_size}'] = BB_lower_band





# drop NaN values
data_2023 = data_2023.dropna().reset_index(drop=True)
data_2023

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,MA5,MA10,MA20,MA30,MA60,MA120,MA240,MA90,Upper_Band90,Lower_Band90
0,1672574400000,16556.66,16558.05,16554.91,16558.04,207.12723,3.429338e+06,7673,108.88610,1.802822e+06,16553.250,16551.694,16548.7635,16546.038667,16537.489333,16532.317167,16532.058708,16531.639111,16545.453729,16517.824493
1,1672574580000,16557.65,16563.16,16555.21,16556.58,293.15135,4.854459e+06,8439,146.91935,2.432944e+06,16554.820,16552.395,16549.3005,16546.744667,16538.291000,16532.483000,16532.141542,16532.152333,16546.026529,16518.278137
2,1672574760000,16556.58,16559.89,16550.92,16553.17,294.94605,4.882716e+06,9135,136.37729,2.257711e+06,16555.642,16552.862,16549.5900,16547.274333,16539.027167,16532.637083,16532.241083,16532.537000,16546.509407,16518.564593
3,1672574940000,16553.17,16554.41,16548.58,16553.54,190.53334,3.153707e+06,7096,90.21901,1.493346e+06,16555.598,16553.188,16549.6975,16547.865333,16539.732833,16532.773083,16532.356500,16532.869000,16546.982390,16518.755610
4,1672575120000,16553.29,16572.42,16553.13,16564.36,474.51760,7.858992e+06,12230,252.02904,4.174066e+06,16557.138,16554.529,16550.5195,16548.691333,16540.559667,16532.995083,16532.538458,16533.303667,16547.777272,16518.830061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181650,1705276080000,41974.12,41987.22,41960.00,41980.24,66.95987,2.810908e+06,1570,20.29217,8.517988e+05,41894.068,41942.153,42049.1170,42138.247000,42293.168333,42542.915667,42707.473958,42418.309333,42683.343915,42153.274752
181651,1705276260000,41980.25,41980.25,41761.10,41761.11,162.07337,6.788964e+06,3466,72.52297,3.038316e+06,41878.690,41925.864,42020.4720,42113.751000,42278.620167,42533.218417,42703.124208,42406.888889,42677.851683,42135.926095
181652,1705276440000,41761.10,41864.42,41720.00,41837.73,424.45409,1.773590e+07,5239,199.00224,8.315683e+06,41893.438,41908.211,41996.1720,42094.638667,42266.974833,42523.942250,42698.950667,42396.386222,42670.903811,42121.868633
181653,1705276620000,41837.73,41850.00,41732.35,41732.35,135.98620,5.685374e+06,2422,59.46099,2.485873e+06,41857.110,41878.247,41969.2400,42075.053333,42254.048667,42513.601250,42694.377167,42385.078889,42665.756620,42104.401158


In [59]:
# Define the window sizes for the moving averages
windows = [5, 10, 20, 30, 60, 120, 240]
# Define the window size and standard deviation multiplier for the Bollinger Bands
BB_window_size = 90
BB_std_multiplier = 1
# Define the range of future candles
future_start = 20
future_end = 24


# ADDING MA
for window in windows:
    # Calculate the moving average
    moving_avg = data_2023_no_opentime['Close'].rolling(window).mean()
    # Add the moving average as a new column to the data_2023_no_opentime
    data_2023_no_opentime[f'MA{window}'] = moving_avg

# Calculate the moving average and standard deviation
BB_moving_avg = data_2023_no_opentime['Close'].rolling(BB_window_size).mean()
BB_std_dev = data_2023_no_opentime['Close'].rolling(BB_window_size).std()

# ADDING BB
# Calculate the Bollinger Bands
BB_upper_band = BB_moving_avg + (BB_std_multiplier * BB_std_dev)
BB_lower_band = BB_moving_avg - (BB_std_multiplier * BB_std_dev)

# Add the Bollinger Bands and moving average as new columns to the data_2023_no_opentime
data_2023_no_opentime[f'MA{BB_window_size}'] = BB_moving_avg
data_2023_no_opentime[f'Upper_Band{BB_window_size}'] = BB_upper_band
data_2023_no_opentime[f'Lower_Band{BB_window_size}'] = BB_lower_band





# drop NaN values
data_2023_no_opentime = data_2023_no_opentime.dropna().reset_index(drop=True)
data_2023_no_opentime

Unnamed: 0,Open,High,Low,Close,Volume,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,MA5,MA10,MA20,MA30,MA60,MA120,MA240,MA90,Upper_Band90,Lower_Band90
0,16556.66,16558.05,16554.91,16558.04,207.12723,3.429338e+06,7673,108.88610,1.802822e+06,16553.250,16551.694,16548.7635,16546.038667,16537.489333,16532.317167,16532.058708,16531.639111,16545.453729,16517.824493
1,16557.65,16563.16,16555.21,16556.58,293.15135,4.854459e+06,8439,146.91935,2.432944e+06,16554.820,16552.395,16549.3005,16546.744667,16538.291000,16532.483000,16532.141542,16532.152333,16546.026529,16518.278137
2,16556.58,16559.89,16550.92,16553.17,294.94605,4.882716e+06,9135,136.37729,2.257711e+06,16555.642,16552.862,16549.5900,16547.274333,16539.027167,16532.637083,16532.241083,16532.537000,16546.509407,16518.564593
3,16553.17,16554.41,16548.58,16553.54,190.53334,3.153707e+06,7096,90.21901,1.493346e+06,16555.598,16553.188,16549.6975,16547.865333,16539.732833,16532.773083,16532.356500,16532.869000,16546.982390,16518.755610
4,16553.29,16572.42,16553.13,16564.36,474.51760,7.858992e+06,12230,252.02904,4.174066e+06,16557.138,16554.529,16550.5195,16548.691333,16540.559667,16532.995083,16532.538458,16533.303667,16547.777272,16518.830061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181650,41974.12,41987.22,41960.00,41980.24,66.95987,2.810908e+06,1570,20.29217,8.517988e+05,41894.068,41942.153,42049.1170,42138.247000,42293.168333,42542.915667,42707.473958,42418.309333,42683.343915,42153.274752
181651,41980.25,41980.25,41761.10,41761.11,162.07337,6.788964e+06,3466,72.52297,3.038316e+06,41878.690,41925.864,42020.4720,42113.751000,42278.620167,42533.218417,42703.124208,42406.888889,42677.851683,42135.926095
181652,41761.10,41864.42,41720.00,41837.73,424.45409,1.773590e+07,5239,199.00224,8.315683e+06,41893.438,41908.211,41996.1720,42094.638667,42266.974833,42523.942250,42698.950667,42396.386222,42670.903811,42121.868633
181653,41837.73,41850.00,41732.35,41732.35,135.98620,5.685374e+06,2422,59.46099,2.485873e+06,41857.110,41878.247,41969.2400,42075.053333,42254.048667,42513.601250,42694.377167,42385.078889,42665.756620,42104.401158


In [52]:
# Initialize empty lists to hold the data
matrix_list = []


for i in range(len(data_2023) - 19):  # Ensure there are 20 rows available
    # Create a matrix of 20 rows
    matrix = data_2023.iloc[i:i+20].values
    matrix_list.append(matrix)

# Convert the lists to numpy arrays
data_2023_matrix_20 = np.array(matrix_list)


In [60]:
# Initialize empty lists to hold the data
matrix_list = []


for i in range(len(data_2023_no_opentime) - 19):  # Ensure there are 20 rows available
    # Create a matrix of 20 rows
    matrix = data_2023_no_opentime.iloc[i:i+20].values
    matrix_list.append(matrix)

# Convert the lists to numpy arrays
data_2023_no_opentime_matrix_20 = np.array(matrix_list)


In [62]:
data_2023_matrix_20.shape, data_2023_no_opentime_matrix_20.shape

((181636, 20, 20), (181636, 20, 19))

In [63]:
np.save('Data/data_2023_matrix_20.npy', data_2023_matrix_20)

## RFC model

In [64]:
import joblib

# Assuming your model is named RFC
RFC = joblib.load('Models/RFC_model.pkl')
scaler = joblib.load('Scalers/StandardScaler_20.pkl')

In [65]:
# scale the data

# Assuming data is your 600k matrices concatenated into a single 3D numpy array of shape (600000, 20, 19)
temp = data_2023_no_opentime_matrix_20.reshape(-1, 19)  # Reshape to 2D for standardization
temp_normalized = scaler.transform(temp)

# Reshape back to 3D
data_2023_matrix_20_normalized = temp_normalized.reshape(-1, 20, 19)

In [66]:
data_2023_matrix_20_normalized_flattened = np.array([x.flatten() for x in data_2023_matrix_20_normalized])


In [67]:
y_pred = RFC.predict(data_2023_matrix_20_normalized_flattened)

[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.4s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:    1.0s finished


In [68]:
y_pred, len(y_pred)

(array([0, 0, 0, ..., 1, 0, 0]), 181636)

In [69]:
np.save('Data/RFC_20_2023_prediction.npy', y_pred)