In [4]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [5]:
load_dotenv()

alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version = "v2"
)

In [6]:
start_date = pd.Timestamp("2014-01-01", tz="Australia/Melbourne").isoformat()
end_date = pd.Timestamp("2022-01-01", tz="Australia/Melbourne").isoformat()

data = api.get_crypto_bars(
    'BTCUSD',
    '1Day',
    start=start_date,
    end=end_date
).df

data.head(10)

Unnamed: 0_level_0,exchange,open,high,low,close,volume,trade_count,vwap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-07 06:00:00+00:00,CBSE,360.0,360.0,264.81,275.44,6.42,634,298.877741
2015-01-08 06:00:00+00:00,CBSE,297.08,314.91,265.5,288.99,3.93,393,289.251781
2015-01-12 06:00:00+00:00,CBSE,260.0,260.0,260.0,260.0,1.0,1,260.0
2015-01-13 06:00:00+00:00,CBSE,200.0,221.0,200.0,219.63,0.03,3,213.543333
2015-01-14 06:00:00+00:00,CBSE,220.0,220.0,109.87,120.0,11.274638,1115,155.605993
2015-01-15 06:00:00+00:00,CBSE,199.0,224.0,199.0,204.22,1.775121,165,206.555271
2015-01-16 06:00:00+00:00,CBSE,200.12,209.82,196.51,199.46,2.49,249,203.13257
2015-01-17 06:00:00+00:00,CBSE,195.0,210.0,184.0,184.0,0.089,9,195.775281
2015-01-18 06:00:00+00:00,CBSE,210.0,225.51,210.0,225.51,0.41,5,210.378293
2015-01-20 06:00:00+00:00,CBSE,215.0,218.0,208.0,218.0,0.04,4,214.5


In [7]:
data = data.drop(columns='exchange')

In [8]:
data['actual_returns'] = data['close'].pct_change()

In [9]:
data['sma_fast'] = data['close'].rolling(window=3).mean()
data['sma_slow'] = data['close'].rolling(window=10).mean()

In [10]:
data = data.dropna()

In [11]:
data['signal'] = 0
data.loc[(data["actual_returns"] >= 0), "signal"] = 1
data.loc[(data["actual_returns"] < 0), "signal"] = -1

In [12]:
data.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,actual_returns,sma_fast,sma_slow,signal
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-20 06:00:00+00:00,215.0,218.0,208.0,218.0,0.04,4,214.5,-0.033302,209.17,219.525,-1
2015-01-21 06:00:00+00:00,245.5,257.73,216.59,246.62,4.33,236,229.540393,0.131284,230.043333,216.643,1
2015-01-22 06:00:00+00:00,217.59,239.67,217.59,226.32,0.03,3,227.86,-0.082313,230.313333,210.376,-1
2015-01-23 06:00:00+00:00,230.0,235.0,215.0,235.0,0.180001,10,230.277693,0.038353,235.98,207.876,1
2015-01-24 06:00:00+00:00,248.02,255.05,247.55,250.91,6.637441,15,252.211775,0.067702,237.41,211.004,1
2015-01-25 06:00:00+00:00,262.78,290.0,260.43,283.28,483.522412,291,273.022937,0.12901,256.396667,227.332,1
2015-01-26 06:00:00+00:00,284.25,322.72,241.43,274.48,4696.796382,2931,279.986229,-0.031065,269.556667,234.358,-1
2015-01-27 06:00:00+00:00,263.96,265.99,253.53,257.89,538.920802,1083,260.189957,-0.060442,271.883333,240.201,-1
2015-01-28 06:00:00+00:00,257.9,262.0,228.5,236.09,4800.863335,6244,244.627508,-0.084532,256.153333,245.41,-1
2015-01-29 06:00:00+00:00,232.62,240.0,221.14,235.03,5033.534735,6091,232.060766,-0.00449,243.003333,246.362,-1


In [13]:
X = data[['sma_fast', 'sma_slow']].shift().dropna()
y = data['signal']

display(X.head())
display(y.head())

Unnamed: 0_level_0,sma_fast,sma_slow
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 06:00:00+00:00,209.17,219.525
2015-01-22 06:00:00+00:00,230.043333,216.643
2015-01-23 06:00:00+00:00,230.313333,210.376
2015-01-24 06:00:00+00:00,235.98,207.876
2015-01-25 06:00:00+00:00,237.41,211.004


timestamp
2015-01-20 06:00:00+00:00   -1
2015-01-21 06:00:00+00:00    1
2015-01-22 06:00:00+00:00   -1
2015-01-23 06:00:00+00:00    1
2015-01-24 06:00:00+00:00    1
Name: signal, dtype: int64

In [14]:
data.to_csv('master.csv')

In [15]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = data[['sma_fast', 'sma_slow']].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0_level_0,sma_fast,sma_slow
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 06:00:00+00:00,209.17,219.525
2015-01-22 06:00:00+00:00,230.043333,216.643
2015-01-23 06:00:00+00:00,230.313333,210.376
2015-01-24 06:00:00+00:00,235.98,207.876
2015-01-25 06:00:00+00:00,237.41,211.004


In [16]:
# Create the target set selecting the Signal column and assiging it to y
y = data['signal']

# Review the value counts
y.value_counts()

 1    1742
-1    1509
Name: signal, dtype: int64

In [17]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2015-01-21 06:00:00+00:00


In [37]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)

# Display the training end date
print(training_end)

2015-04-21 06:00:00+00:00


In [19]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,sma_fast,sma_slow
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 06:00:00+00:00,209.17,219.525
2015-01-22 06:00:00+00:00,230.043333,216.643
2015-01-23 06:00:00+00:00,230.313333,210.376
2015-01-24 06:00:00+00:00,235.98,207.876
2015-01-25 06:00:00+00:00,237.41,211.004


In [20]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_train.head()

Unnamed: 0_level_0,sma_fast,sma_slow
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 06:00:00+00:00,209.17,219.525
2015-01-22 06:00:00+00:00,230.043333,216.643
2015-01-23 06:00:00+00:00,230.313333,210.376
2015-01-24 06:00:00+00:00,235.98,207.876
2015-01-25 06:00:00+00:00,237.41,211.004


In [21]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [22]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC(kernel='rbf', probability=True, C=5,gamma=10)
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred

array([ 1,  1, -1, ...,  1,  1,  1])

In [23]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)

              precision    recall  f1-score   support

          -1       0.59      0.03      0.06      1463
           1       0.54      0.98      0.70      1696

    accuracy                           0.54      3159
   macro avg       0.56      0.51      0.38      3159
weighted avg       0.56      0.54      0.40      3159

