In [None]:
#Import the libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
#Create a function to get RSI values
def RSI(data, time_period=14, column='Close', MA='SMA'):
 delta = data[column].diff(1)
 delta = delta.dropna()
 up = delta.copy()
 down = delta.copy()
 up[up < 0 ] = 0
 down[down > 0] = 0
 if MA == 'SMA':
  AVG_Gain = up.rolling(window=time_period).mean()
  AVG_Loss = abs(down.rolling(window=time_period).mean())
 elif MA == 'EMA':
  AVG_Gain = up.ewm(spain=time_period, adjust=False).mean()
  AVG_Loss = abs(down.ewm(spain=time_period, adjust=False).mean())

 RS = AVG_Gain / AVG_Loss
 RSI = 100.0 - (100.0/(1.0 + RS))
 data['RSI'+str(time_period)+'_'+str(MA)] = RSI

In [None]:
#Create a function to select the best model to use
def select_best_model(x_train, y_train):

  #create a list of machine learning models
  models = [
      RandomForestClassifier(),
      SVC(),
      LogisticRegression()
  ]
  #Evaluate each model on the training set
  model_scores= []
  for model in models:
    model.fit(x_train, y_train)
    #calculate the accuracy of the model
    score = model.score(x_train, y_train)
    model_scores.append((model, model_scores))

  #select the model with the highest score
  best_model = max(model_scores, key=lambda x:x[1])[0]

  return best_model

In [None]:
# Load the data
df = pd.read_csv('BTC 21.csv')

In [None]:
# Create a new column called Percentage Change
df['Percentage Change'] = df['Close'].pct_change()

# Create a buy and sell signal (1=Buy, -1=Sell)
df['Signal'] = np.where(df['Percentage Change']>= 0, 1, -1)
df.dropna(inplace= True) # Romve NaN values

# Create RSI columns
RSI(df, 6, MA="SMA")
RSI(df, 2, MA="SMA")

df.dropna(inplace= True)
# Create the buy (1) and sell (-1) and hold (0) signals based on RSI values for RSI6_SMA and RSI2_SMA
df['RSI6_signal'] = np.where(df['RSI6_SMA'] <= 10, 1, 0)
df['RSI6_signal'] = np.where(df['RSI6_SMA'] >= 90, -1, df['RSI6_signal'])

df['RSI2_signal'] = np.where(df['RSI2_SMA'] <= 10, 1, 0)
df['RSI2_signal'] = np.where(df['RSI2_SMA'] >= 90, -1, df['RSI2_signal'])

# Set the index to be the date
df.set_index(df['Date'])
# Drop the date column
df.drop("Date", axis=1, inplace= True)


#Show the data
print(df)

             Open          High           Low         Close     Adj Close  \
15   36821.648438  37864.367188  35633.554688  36178.140625  36178.140625   
16   36163.648438  36722.351563  34069.320313  35791.277344  35791.277344   
17   35792.238281  37299.285156  34883.843750  36630.074219  36630.074219   
18   36642.234375  37755.890625  36069.804688  36069.804688  36069.804688   
19   36050.113281  36378.328125  33570.476563  35547.750000  35547.750000   
..            ...           ...           ...           ...           ...   
360  50802.609375  51956.328125  50499.468750  50640.417969  50640.417969   
361  50679.859375  50679.859375  47414.210938  47588.855469  47588.855469   
362  47623.871094  48119.742188  46201.496094  46444.710938  46444.710938   
363  46490.605469  47879.964844  46060.312500  47178.125000  47178.125000   
364  47169.371094  48472.527344  45819.953125  46306.445313  46306.445313   

          Volume  Percentage Change  Signal   RSI6_SMA   RSI2_SMA  \
15   5

In [None]:
#Get the features to train and test on and the target data set to predict/classify
features = ['Volume', 'Close', 'RSI6_signal', 'RSI2_signal']
target = 'Signal'
x= df[features]
y= df[target]

#Split the data set (80%) traning and (20%) testing
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.2, random_state= 0)

#Select the best model
best_model = select_best_model(x_train, y_train)

#Evaluate the best model on the test data and get the models predictions/classifications
y_pred = best_model.predict(x_test)
#calculate the score of the model
score = best_model.score(x_test, y_test)

#Print the Accuracy of the best model on the test data
print("Accuracy of the best model:", score)

Accuracy of the best model: 0.7571428571428571
