In [None]:
# Imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import numpy as np
import hvplot.pandas
from pathlib import Path
import alpaca_trade_api as tradeapi
from finta import TA
from sklearn.metrics import classification_report
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [None]:
#Load .env enviroment variables
load_dotenv()

In [None]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version='v2')

In [None]:
# Set the ticker
ticker = "SPY"

# Set timeframe to "1Day"
timeframe = "1Day"

# Set start and end datetimes of 3 years
start_date = pd.Timestamp("2021-01-02", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2024-01-02", tz="America/New_York").isoformat()

# Get 5 years worth of historical data for SPY
ticker_data = alpaca.get_bars(
    ticker,
    timeframe,
    start=start_date,
    end=end_date
).df
ticker_data.head()

In [None]:
# Filter the date index and close columns

spy_df = ticker_data.loc[:, ["open", "high", "low", "close","volume" ]]

# Use the pct_change function to generate returns from close prices
spy_df["Actual Returns"] = spy_df["close"].pct_change()

# Drop all NaN values from the DataFrame
spy_df = spy_df.dropna()

# Review the DataFrame
display(spy_df.head())
display(spy_df.tail())


In [None]:
# Plot the DataFrame with hvplot
spy_df["close"].hvplot()

In [None]:
#Recreate Trading Algo 

# Create a signals_df DataFrame

signals_df = spy_df.copy()

# Set the short window and long windows
short_window = 20
long_window = 50

# Add the HMA technical indicators for the short and long windows
signals_df["Short"] = TA.HMA(signals_df, short_window)
signals_df["Long"] = TA.HMA(signals_df, long_window)

# Review the DataFrame
signals_df.iloc[95:105, :]

In [None]:
# Set the Signal column
signals_df["Signal"] = 0.0

# Generate the trading signal 1 or 0,
# where 1 is when the Short window is greater than (or crosses over) the Long Window
# where 0 is when the Short window is under the Long window
signals_df["Signal"][short_window:] = np.where(
    signals_df["Short"][short_window:] > signals_df["Long"][short_window:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or -1
signals_df["Entry/Exit"] = signals_df["Signal"].diff()

# Review the DataFrame
signals_df.iloc[95:105, :]

In [None]:
# Visualize entry position relative to close price
entry = signals_df[signals_df["Entry/Exit"] == 1.0]["close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = signals_df[signals_df["Entry/Exit"] == -1.0]["close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = signals_df[["close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = signals_df[["Short", "Long"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_plot = security_close * moving_avgs * entry * exit
entry_exit_plot

In [None]:
##Split the data into training and testing datasets

In [None]:
# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = signals_df[['Short', 'Long']].shift().dropna()

# Review the DataFrame
X.head()

In [None]:
# Create the target set selecting the Signal column and assiging it to y
y = signals_df['Signal']

# Review the value counts
y.value_counts()

In [None]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

In [None]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=6)

# Display the training end date
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_train.head()

In [None]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
### Step 4: Use the `SVC` classifier model from SKLearn's support vector machine (SVM) learning method to 
###fit the training data and make predictions based on the testing data. Review the predictions.

In [None]:
# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)
 
# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred

In [None]:
### Step 5: Review the classification report associated with the `SVC` model predictions. 

In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)


In [None]:
### Step 6: Create a predictions DataFrame that contains columns for 
#“Predicted” values, “Actual Returns”, and “Strategy Returns”.

In [None]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
predictions_df['Predicted'] = svm_pred

# Add the actual returns to the DataFrame
predictions_df['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
predictions_df['Strategy Returns'] = (predictions_df['Actual Returns'] * predictions_df['Predicted'])

# Review the DataFrame
display(predictions_df.head())
display(predictions_df.tail())

In [None]:
### Step 7: Create a cumulative return plot that shows the actual returns vs. the strategy returns. Save a PNG image of this plot. 
###This will serve as a baseline against which to compare the effects of tuning the trading algorithm.

In [None]:
# Plot the actual returns versus the strategy returns
(1 + predictions_df[['Actual Returns', 'Strategy Returns']]).cumprod().hvplot()

In [None]:
### Step 1:  Import a new classifier, such as `AdaBoost`, `DecisionTreeClassifier`, or `LogisticRegression`. (For the full list of classifiers, refer to the
###[Supervised learning page](https://scikit-learn.org/stable/supervised_learning.html) in the scikit-learn documentation.)

In [None]:
# Import a new classifier from SKLearn
from sklearn.calibration import CalibratedClassifierCV

# Initiate the model instance
cj_model = CalibratedClassifierCV()

In [None]:
### Step 2: Using the original training data as the baseline model, fit another model with the new classifier.

In [None]:
# Fit the model using the training data
cj_model = cj_model.fit(X_train_scaled, y_train)

# Use the testing dataset to generate the predictions for the new model
cj_pred = cj_model.predict(X_test_scaled)

# Review the model's predicted values
cj_pred


In [None]:
# Use a classification report to evaluate the model using the predictions and testing data
cj_testing_report = classification_report(y_test, cj_pred)

# Print the classification report
print(cj_testing_report)


In [None]:
# Create a new empty predictions DataFrame:

# Create a predictions DataFrame
cj_pred_df = pd.DataFrame(index=X_test.index)

# Add the SVM model predictions to the DataFrame
cj_pred_df['CJ Predicted'] = cj_pred

# Add the actual returns to the DataFrame
cj_pred_df['Actual Returns'] = signals_df['Actual Returns']

# Add the strategy returns to the DataFrame
cj_pred_df['CJ Strategy Returns'] = (cj_pred_df['Actual Returns'] * cj_pred_df['CJ Predicted'])

# Review the DataFrame
display(cj_pred_df.head())
display(cj_pred_df.tail())


In [None]:
# Plot the actual returns versus the strategy returns
(1 + cj_pred_df[['Actual Returns', 'CJ Strategy Returns']]).cumprod().hvplot()

In [None]:
# Create a list for the column name
columns = ["Actual"]

# Create a list holding the names of the new evaluation metrics
metrics = [
    "Annualized Return",
    "Cumulative Returns",
    "Annual Volatility",
    "Sharpe Ratio"]

# Initialize the DataFrame with index set to the evaluation metrics and the column
spy_evaluation_df = pd.DataFrame(index=metrics, columns=columns)

In [None]:
 # Calculate annualized return
spy_evaluation_df.loc["Annualized Return"] = (
    cj_pred_df["Actual Returns"].mean() * 252
)

# Calculate cumulative return
spy_evaluation_df.loc["Cumulative Returns"] = (1 + cj_pred_df["Actual Returns"]).cumprod()[-1]

 # Calculate annual volatility
spy_evaluation_df.loc["Annual Volatility"] = (
    cj_pred_df["Actual Returns"].std() * np.sqrt(252)
)

# Calculate Sharpe ratio
spy_evaluation_df.loc["Sharpe Ratio"] = (
    cj_pred_df["Actual Returns"].mean() * 252) / (
    cj_pred_df["Actual Returns"].std() * np.sqrt(252)
)

# Review the result
spy_evaluation_df

In [None]:
# Create a list for the column name
columns = ["Strategy"]

# Create a list holding the names of the new evaluation metrics
metrics = [
    "Annualized Return",
    "Cumulative Returns",
    "Annual Volatility",
    "Sharpe Ratio"]

# Initialize the DataFrame with index set to the evaluation metrics and the column
strategy_evaluation_df = pd.DataFrame(index=metrics, columns=columns)

In [None]:
 # Calculate annualized return
strategy_evaluation_df.loc["Annualized Return"] = (
    cj_pred_df["CJ Strategy Returns"].mean() * 252
)

# Calculate cumulative return
strategy_evaluation_df.loc["Cumulative Returns"] = (1 + cj_pred_df["CJ Strategy Returns"]).cumprod()[-1]

 # Calculate annual volatility
strategy_evaluation_df.loc["Annual Volatility"] = (
    cj_pred_df["CJ Strategy Returns"].std() * np.sqrt(252)
)

# Calculate Sharpe ratio
strategy_evaluation_df.loc["Sharpe Ratio"] = (
    cj_pred_df["CJ Strategy Returns"].mean() * 252) / (
    cj_pred_df["CJ Strategy Returns"].std() * np.sqrt(252)
)

# Review the result
strategy_evaluation_df

In [None]:
pd.concat([spy_evaluation_df, strategy_evaluation_df], axis=1)