In [133]:
# Initial imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import hvplot.pandas
p
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt

In [134]:
# Import stock data to dataframe
tickers_index = 'BTC-USD'

start = dt.datetime(2017, 1, 1)
end = dt.datetime(2022, 8, 31)
 
df_1 = pdr.get_data_yahoo(tickers_index, start, end)
df_1.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01,1003.080017,958.698975,963.65802,998.325012,147775008,998.325012
2017-01-02,1031.390015,996.702026,998.617004,1021.75,222184992,1021.75
2017-01-03,1044.079956,1021.599976,1021.599976,1043.839966,185168000,1043.839966
2017-01-04,1159.420044,1044.400024,1044.400024,1154.72998,344945984,1154.72998
2017-01-05,1191.099976,910.416992,1156.72998,1013.380005,510199008,1013.380005


In [135]:
df_1.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01,1003.080017,958.698975,963.65802,998.325012,147775008,998.325012
2017-01-02,1031.390015,996.702026,998.617004,1021.75,222184992,1021.75
2017-01-03,1044.079956,1021.599976,1021.599976,1043.839966,185168000,1043.839966
2017-01-04,1159.420044,1044.400024,1044.400024,1154.72998,344945984,1154.72998
2017-01-05,1191.099976,910.416992,1156.72998,1013.380005,510199008,1013.380005


In [136]:
def clean_df(df_1):
    if 'Open' in df_1.columns:
        df_1 = df_1.drop(['Open','High','Low','Volume','Adj Close'],axis=1) #drop unwanted columns
        df_1 = df_1.dropna().copy() #drop null values
        print(f"Number of Null Values: {df_1.isnull().sum().sum()}") #check for null values and print
    return df_1 #return df to variable

In [137]:
# verify the data frame
df_1 = clean_df(df_1)
df_1.columns = ['BTC-USD']
df_1.tail()

Number of Null Values: 0


Unnamed: 0_level_0,BTC-USD
Date,Unnamed: 1_level_1
2022-08-28,19616.814453
2022-08-29,20297.994141
2022-08-30,19796.808594
2022-08-31,20049.763672
2022-09-01,20127.140625


In [138]:
# Calculate the daily returns using the closing prices and the pct_change function
df_1["actual_returns"] = df_1["BTC-USD"].pct_change()

# Drop all NaN values from the DataFrame
df_1 = df_1.dropna()

# Review the DataFrame
display(df_1.head())
display(df_1.tail())



Unnamed: 0_level_0,BTC-USD,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-02,1021.75,0.023464
2017-01-03,1043.839966,0.02162
2017-01-04,1154.72998,0.106233
2017-01-05,1013.380005,-0.12241
2017-01-06,902.200989,-0.109711


Unnamed: 0_level_0,BTC-USD,actual_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-08-28,19616.814453,-0.021202
2022-08-29,20297.994141,0.034724
2022-08-30,19796.808594,-0.024691
2022-08-31,20049.763672,0.012778
2022-09-01,20127.140625,0.003859


In [139]:
# Create a simple moving average (SMA) using the short_window and assign this to a new columns called sma_fast
# Define a window size of 50
short_window = 50
df_1["sma_fast"] = df_1["BTC-USD"].rolling(window=short_window).mean()
# Define a window size of 100
long_window = 100

# Create a simple moving average (SMA) using the long_window and assign this to a new columns called sma_slow
df_1["sma_slow"] = df_1["BTC-USD"].rolling(window=long_window).mean()

In [140]:
# Drop the NaNs using dropna()
df_1 = df_1.dropna()

In [141]:
# Create a new column in the trading_df called signal setting its value to zero.
df_1["signal"] = 0.0
df_1

Unnamed: 0_level_0,BTC-USD,actual_returns,sma_fast,sma_slow,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-04-11,1205.010010,0.015062,1139.049843,1048.253910,0.0
2017-04-12,1200.369995,-0.003851,1140.751241,1050.040110,0.0
2017-04-13,1169.280029,-0.025900,1141.788043,1051.294510,0.0
2017-04-14,1167.540039,-0.001488,1141.804445,1051.422611,0.0
2017-04-15,1172.520020,0.004265,1141.781244,1053.014011,0.0
...,...,...,...,...,...
2022-08-28,19616.814453,-0.021202,22343.746484,23601.842871,0.0
2022-08-29,20297.994141,0.034724,22332.497383,23510.500547,0.0
2022-08-30,19796.808594,-0.024691,22329.022422,23405.231406,0.0
2022-08-31,20049.763672,0.012778,22343.539414,23314.739941,0.0


In [142]:
# Generate the trading signal 0 or 1,
# where 1 is the short-window (SMA50) greater than the long-window (SMA100)
# and 0 is when the condition is not met
df_1["signal"][short_window:] = np.where(
    df_1["sma_slow"][short_window:] > df_1["sma_fast"][short_window:], 1.0, 0.0
)

# Review the DataFrame
df_1.tail(10)

Unnamed: 0_level_0,BTC-USD,actual_returns,sma_fast,sma_slow,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-08-23,21528.087891,0.006037,22399.442031,24057.939121,1.0
2022-08-24,21395.019531,-0.006181,22423.540117,23973.260137,1.0
2022-08-25,21600.904297,0.009623,22444.593281,23885.010605,1.0
2022-08-26,20260.019531,-0.062075,22417.041914,23800.408086,1.0
2022-08-27,20041.738281,-0.010774,22383.254336,23697.682129,1.0
2022-08-28,19616.814453,-0.021202,22343.746484,23601.842871,1.0
2022-08-29,20297.994141,0.034724,22332.497383,23510.500547,1.0
2022-08-30,19796.808594,-0.024691,22329.022422,23405.231406,1.0
2022-08-31,20049.763672,0.012778,22343.539414,23314.739941,1.0
2022-09-01,20127.140625,0.003859,22341.840742,23219.455488,1.0


In [143]:
df_1['signal'].value_counts()

0.0    1105
1.0     865
Name: signal, dtype: int64

In [145]:
# Calculate the strategy returns and add them to the signals_df DataFrame
df_1['Strategy Returns'] = df_1['actual_returns'] * df_1['signal'].shift()

# Review the DataFrame
display(df_1.head())
display(df_1.tail())

Unnamed: 0_level_0,BTC-USD,actual_returns,sma_fast,sma_slow,signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-04-11,1205.01001,0.015062,1139.049843,1048.25391,0.0,
2017-04-12,1200.369995,-0.003851,1140.751241,1050.04011,0.0,-0.0
2017-04-13,1169.280029,-0.0259,1141.788043,1051.29451,0.0,-0.0
2017-04-14,1167.540039,-0.001488,1141.804445,1051.422611,0.0,-0.0
2017-04-15,1172.52002,0.004265,1141.781244,1053.014011,0.0,0.0


Unnamed: 0_level_0,BTC-USD,actual_returns,sma_fast,sma_slow,signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-08-28,19616.814453,-0.021202,22343.746484,23601.842871,1.0,-0.021202
2022-08-29,20297.994141,0.034724,22332.497383,23510.500547,1.0,0.034724
2022-08-30,19796.808594,-0.024691,22329.022422,23405.231406,1.0,-0.024691
2022-08-31,20049.763672,0.012778,22343.539414,23314.739941,1.0,0.012778
2022-09-01,20127.140625,0.003859,22341.840742,23219.455488,1.0,0.003859


In [146]:
df_1.head()

Unnamed: 0_level_0,BTC-USD,actual_returns,sma_fast,sma_slow,signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-04-11,1205.01001,0.015062,1139.049843,1048.25391,0.0,
2017-04-12,1200.369995,-0.003851,1140.751241,1050.04011,0.0,-0.0
2017-04-13,1169.280029,-0.0259,1141.788043,1051.29451,0.0,-0.0
2017-04-14,1167.540039,-0.001488,1141.804445,1051.422611,0.0,-0.0
2017-04-15,1172.52002,0.004265,1141.781244,1053.014011,0.0,0.0


In [147]:
# Plot Strategy Returns to examine performance
(1 + df_1['Strategy Returns']).cumprod().plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f6e7433c5d0>

In [148]:
# Imports 
from pandas.tseries.offsets import DateOffset

In [150]:
 # Assign a copy of the sma_fast and sma_slow columns to a new DataFrame called X
X = df_1[["sma_fast", "sma_slow"]].copy()

# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-11,1139.049843,1048.25391
2017-04-12,1140.751241,1050.04011
2017-04-13,1141.788043,1051.29451
2017-04-14,1141.804445,1051.422611
2017-04-15,1141.781244,1053.014011


Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-08-28,22343.746484,23601.842871
2022-08-29,22332.497383,23510.500547
2022-08-30,22329.022422,23405.231406
2022-08-31,22343.539414,23314.739941
2022-09-01,22341.840742,23219.455488


In [151]:
# Copy the new signal column to a new Series called y.
# Creating the target set y
y = df_1["signal"]

# Display sample data
y.head()

Date
2017-04-11    0.0
2017-04-12    0.0
2017-04-13    0.0
2017-04-14    0.0
2017-04-15    0.0
Name: signal, dtype: float64

In [152]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2017-04-11 00:00:00


In [153]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=12)

# Display the training end date
print(training_end)

2018-04-11 00:00:00


In [155]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Display sample data
X_train.head()

Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-04-11,1139.049843,1048.25391
2017-04-12,1140.751241,1050.04011
2017-04-13,1141.788043,1051.29451
2017-04-14,1141.804445,1051.422611
2017-04-15,1141.781244,1053.014011


In [156]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Display sample data
X_test.head()


Unnamed: 0_level_0,sma_fast,sma_slow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-04-11,8768.319199,10136.625918
2018-04-12,8712.296191,10065.697422
2018-04-13,8670.115391,9992.647021
2018-04-14,8623.818203,9916.517422
2018-04-15,8594.139004,9825.513525


In [157]:
# Imports
from sklearn.preprocessing import StandardScaler

In [158]:
# Split the preprocessed data into training and testing datasets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [159]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train


Date
2019-07-05    0.0
2018-01-06    0.0
2022-03-02    1.0
2022-08-23    1.0
2018-12-10    1.0
             ... 
2022-03-07    1.0
2020-04-11    1.0
2022-07-26    1.0
2017-12-02    0.0
2020-03-07    0.0
Name: signal, Length: 1477, dtype: float64

In [160]:
# Import Amazon SageMaker libraries and modules
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer

# Import AWS Python SDK
import boto3

# Import support libraries
import io
import os
import json
import numpy as np

In [161]:
# Set the S3 bucket name
bucket = "fintechbootcamp-pankaj-sep-08-1"

In [162]:
# Set a prefix for the data files
prefix = "Machine_Learning_1"

In [163]:
# Set the IAM execution role
role = get_execution_role()

In [164]:
# Encode the training data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_train_scaled).astype("float32")
labels = np.array(y_train).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded training data to Amazon S3
key = 'linear_train.data'
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "train", key)).upload_fileobj(buf)
s3_train_data = "s3://{}/{}/train/{}".format(bucket, prefix, key)
print("Training data uploaded to: {}".format(s3_train_data))

Training data uploaded to: s3://fintechbootcamp-pankaj-sep-08-1/Machine_Learning_1/train/linear_train.data


In [165]:
# Encode the testing data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_test_scaled).astype("float32")
labels = np.array(y_test).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded testing data to Amazon S3
key = "linear_test.data"
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "test", key)).upload_fileobj(buf)
s3_test_data = "s3://{}/{}/test/{}".format(bucket, prefix, key)
print("Testing data uploaded to: {}".format(s3_test_data))

Testing data uploaded to: s3://fintechbootcamp-pankaj-sep-08-1/Machine_Learning_1/test/linear_test.data


In [166]:
# Save the current session in a variable
sess = sagemaker.Session()

In [167]:
# Import the get_image_uri module from the sagemaker library
from sagemaker.amazon.amazon_estimator import get_image_uri

In [168]:
# Import the container image
container = get_image_uri(boto3.Session().region_name, "linear-learner")

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: 1.


In [169]:
# Create an instance of the machine learning model
linear = sagemaker.estimator.Estimator(
    container,
    role,
    train_instance_count=1,
    train_instance_type="ml.m4.xlarge",
    output_path="s3://{}/{}/output".format(bucket, prefix),
    sagemaker_session=sess,
)

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [170]:
# Get the dimension of the feature-input vector
feature_dim = X.shape[1]

In [171]:
# Define linear learner hyperparameters
# Note how in this case we use: predictor_type='binary_classifier' # (credit risk: good or bad)
linear.set_hyperparameters(
    feature_dim=feature_dim,
    mini_batch_size=200,
    predictor_type="binary_classifier"
)

In [172]:
# Fitting the linear learner model
linear.fit({"train": s3_train_data, "test": s3_test_data})

2022-09-19 18:29:19 Starting - Starting the training job...ProfilerReport-1663612159: InProgress
...
2022-09-19 18:30:02 Starting - Preparing the instances for training......
2022-09-19 18:31:10 Downloading - Downloading input data...
2022-09-19 18:31:43 Training - Downloading the training image...........[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[09/19/2022 18:33:35 INFO 139889317406528] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.

In [173]:
# Deploy an instance of the linear learner model to create a predictor
linear_predictor = linear.deploy(initial_instance_count=1, instance_type="ml.t2.medium")

-------------------------!

In [204]:
# Linear predictor configurations
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

In [205]:
# Making some predictions using the test data
model_predictions = linear_predictor.predict(X_test_scaled)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The json_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [206]:
# Display sample predictions
model_predictions["predictions"][:3]

[{'score': 0.4150005877017975, 'predicted_label': 1},
 {'score': 0.4038451313972473, 'predicted_label': 0},
 {'score': 0.33314356207847595, 'predicted_label': 0}]

In [207]:
# Create a list with the predicted values
y_predictions = [np.uint8(value["predicted_label"]) for value in model_predictions["predictions"]]

# Transforming the list into an array
y_predictions = np.array(y_predictions)

# Display sample data
y_predictions[:10]

array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0], dtype=uint8)

In [208]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['Predicted'] = y_predictions

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = df_1['actual_returns']

# Add the strategy returns to the DataFrame
predictions_df['Strategy Returns'] = predictions_df['Actual Returns'] * predictions_df['Predicted']

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-20,1,0.013358,0.013358
2020-08-03,0,0.017436,0.0
2020-06-17,0,-0.006057,-0.0
2020-09-12,0,0.003967,0.0
2020-01-14,1,0.083933,0.083933


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-04-30,0,0.019746,0.0
2020-09-29,0,0.012604,0.0
2021-08-19,1,0.042775,0.042775
2018-06-17,1,-0.007769,-0.007769
2021-12-31,1,-0.018476,-0.018476


In [209]:
predictions_df['Strategy Returns'] = predictions_df['Strategy Returns'].cumsum()
display(predictions_df.head())
display(predictions_df.tail())

Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-20,1,0.013358,0.013358
2020-08-03,0,0.017436,0.013358
2020-06-17,0,-0.006057,0.013358
2020-09-12,0,0.003967,0.013358
2020-01-14,1,0.083933,0.097292


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-04-30,0,0.019746,0.316067
2020-09-29,0,0.012604,0.316067
2021-08-19,1,0.042775,0.358842
2018-06-17,1,-0.007769,0.351073
2021-12-31,1,-0.018476,0.332597


In [210]:
predicted_returns = sum(predictions_df['Strategy Returns'])
predicted_returns

13.624659636986703

In [211]:
predicted_returns_1 = sum(predictions_df['Actual Returns'])
predicted_returns_1

1.6150685295076341

In [215]:
# Imports
from sklearn import svm
from sklearn.metrics import classification_report

In [216]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([0., 0., 1., 1., 1., 0., 1., 1., 1., 0.])

In [217]:
# Evaluate the model using a classification report
training_report = classification_report(y_train, training_signal_predictions)
print(training_report)

              precision    recall  f1-score   support

         0.0       0.96      0.90      0.93       822
         1.0       0.88      0.95      0.91       655

    accuracy                           0.92      1477
   macro avg       0.92      0.92      0.92      1477
weighted avg       0.92      0.92      0.92      1477



In [218]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [219]:
# Evaluate the model's ability to predict the trading signal for the testing data
svm_testing_report = classification_report(y_test, testing_signal_predictions)
print(svm_testing_report)

              precision    recall  f1-score   support

         0.0       0.95      0.90      0.93       283
         1.0       0.88      0.94      0.91       210

    accuracy                           0.92       493
   macro avg       0.91      0.92      0.92       493
weighted avg       0.92      0.92      0.92       493



In [220]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df_new = pd.DataFrame(index=X_test.index)
predictions_df_new["predicted_signal"] = testing_signal_predictions
predictions_df_new["actual_returns"] = df_1["actual_returns"]
predictions_df_new["trading_algorithm_returns"] = predictions_df_new["actual_returns"] * predictions_df_new["predicted_signal"]
predictions_df_new.head()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-20,0.0,0.013358,0.0
2020-08-03,1.0,0.017436,0.017436
2020-06-17,0.0,-0.006057,-0.0
2020-09-12,0.0,0.003967,0.0
2020-01-14,1.0,0.083933,0.083933


In [227]:
predicted_returns_SVM = sum(predictions_df_new['trading_algorithm_returns'])
predicted_returns_SVM

0.6302543276948315

In [229]:
predicted_returns_SVM_1 = sum(predictions_df_new['actual_returns'])
predicted_returns_SVM_1

1.6150685295076341

In [221]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df_new[["actual_returns", "trading_algorithm_returns"]]).cumprod().plot()


<matplotlib.axes._subplots.AxesSubplot at 0x7f6e7f54e890>

In [230]:
# Import LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression

In [231]:
# Create an instance of the LogisticRegression model
logistic_regression_model = LogisticRegression()

In [232]:
# Fit the LogisticRegression model
logistic_regression_model.fit(X_train_scaled, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [233]:
# Use the trained LogisticRegression model to predict the trading signals for the training data
lr_training_signal_predictions = logistic_regression_model.predict(X_test_scaled)

# Display the predictions
lr_training_signal_predictions

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
       1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1.,
       1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 1.,
       0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1.,
       0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0.,
       1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.,
       1., 1., 0., 0., 1.

In [235]:
# Generate a classification report using the testing data and the logistic regression model's predications
lr_training_report = classification_report(y_test, lr_training_signal_predictions)

# Review the classification report
print(lr_training_report)

              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91       283
         1.0       1.00      0.72      0.84       210

    accuracy                           0.88       493
   macro avg       0.91      0.86      0.87       493
weighted avg       0.90      0.88      0.88       493



In [237]:
# Create a new empty predictions DataFrame using code provided below.
predictions_df_new_lr = pd.DataFrame(index=X_test.index)
predictions_df_new_lr["predicted_signal"] = lr_training_signal_predictions
predictions_df_new_lr["actual_returns"] = df_1["actual_returns"]
predictions_df_new_lr["trading_algorithm_returns"] = predictions_df_new_lr["actual_returns"] * predictions_df_new_lr["predicted_signal"]
predictions_df_new_lr.head()

Unnamed: 0_level_0,predicted_signal,actual_returns,trading_algorithm_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-20,0.0,0.013358,0.0
2020-08-03,0.0,0.017436,0.0
2020-06-17,0.0,-0.006057,-0.0
2020-09-12,0.0,0.003967,0.0
2020-01-14,1.0,0.083933,0.083933


In [239]:
predicted_returns_lr = sum(predictions_df_new_lr['trading_algorithm_returns'])
predicted_returns_lr

0.3796569381412901

In [240]:
predicted_returns_lr_1 = sum(predictions_df_new_lr['actual_returns'])
predicted_returns_lr_1

1.6150685295076341