In [8]:
# Initial imports
import os
import requests
import json
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
import numpy as np
import hvplot.pandas
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
#Set path and read BAC daily trading df.
bac_csvpath = Path("./Resources/BAC.csv")
bac_close = pd.read_csv(
    bac_csvpath, 
    index_col = 'Date',
    parse_dates = True,
    infer_datetime_format = True
)
bac_close

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-02,29.799999,30.059999,28.750000,29.309999,27.372540,86644200
2018-04-03,29.540001,29.650000,29.100000,29.590000,27.634033,69698700
2018-04-04,29.000000,29.969999,28.950001,29.879999,27.904861,66640800
2018-04-05,30.180000,30.549999,30.120001,30.320000,28.315777,55765500
2018-04-06,30.010000,30.290001,29.370001,29.629999,27.671385,79687600
...,...,...,...,...,...,...
2021-03-24,37.209999,37.790001,36.889999,36.900002,36.900002,43896800
2021-03-25,36.869999,37.750000,36.630001,37.660000,37.660000,47517900
2021-03-26,38.349998,38.759998,38.080002,38.680000,38.680000,66733600
2021-03-29,38.049999,38.630001,37.860001,38.310001,38.310001,45680000


In [10]:
#Drop columns except close and volume
bac_close.drop(['Open', 'High', 'Low', 'Adj Close'], axis=1, inplace=True)
bac_close.tail()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-03-24,36.900002,43896800
2021-03-25,37.66,47517900
2021-03-26,38.68,66733600
2021-03-29,38.310001,45680000
2021-03-30,38.990002,56705900


In [11]:
#API Call
headers = {
    'accept': 'application/json',
    'Authorization': 'Token 168ac68d07d82b3c04e4fcc9fb1db90bdcf380c5',
}
r = requests.get('https://socialsentiment.io/api/v1/stocks/BAC/sentiment/daily/?to_date=2021-03-31&from_date=2020-04-19', headers = headers)
x = r.json()

# Convert to DataFrame
bac_activity_df = pd.read_json(json.dumps(x))
bac_activity_df = bac_activity_df.set_index('date')
bac_activity_df = bac_activity_df.drop(columns = ['stock', 'positive_score', 'negative_score', 'avg_7_days', 'avg_14_days', 'avg_30_days'])
bac_activity_df

Unnamed: 0_level_0,score,activity
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-19,25,122
2020-04-20,11,42
2020-04-21,-1,956
2020-04-22,25,414
2020-04-23,25,147
...,...,...
2021-03-27,0,277
2021-03-28,28,324
2021-03-29,5,1198
2021-03-30,24,733


In [12]:
bac_df = pd.concat([bac_close, bac_activity_df],axis=1).dropna()

# Display sample data
bac_df.head(-5)

Unnamed: 0,Close,Volume,score,activity
2020-04-20,22.500000,74446300.0,11.0,42.0
2020-04-21,21.639999,78871300.0,-1.0,956.0
2020-04-22,21.799999,49759100.0,25.0,414.0
2020-04-23,21.870001,50614800.0,25.0,147.0
2020-04-24,22.180000,47675700.0,2.0,54.0
...,...,...,...,...
2021-03-17,37.950001,63650000.0,-2.0,3097.0
2021-03-18,38.939999,95373900.0,21.0,3062.0
2021-03-19,38.529999,106164800.0,12.0,2443.0
2021-03-22,37.660000,54701300.0,18.0,721.0


In [13]:
bac_price_line = bac_df["Close"].hvplot(
    ylabel='Price',
    width=500,
    height=200
)
bac_price_line

In [14]:
bac_score_bar = bac_df["score"].hvplot.bar(
    ylabel='Social Media Sentiment Score',
    width=500,
    height=200)
bac_score_bar


In [15]:
# Set the variables for short window and long window periods
short_window = 7
long_window = 14

# Generate the short and long window simple moving averages (by 7 and 14 days, respectively)
bac_df["Avg_Activity_1Week"] = bac_df["activity"].rolling(window=short_window).mean()
bac_df["Avg_Activity_2Week"] = bac_df["activity"].rolling(window=long_window).mean()

# Create a column to hold the trading signal
bac_df["Signal"] = 0.0

# Generate the trading signal 0 or 1,
# where 1 is the short-window (SMA7) greater than the long-window (SMA14)
# and 0 is when the condition is not met
bac_df["Signal"][short_window:] = np.where(
    bac_df["Avg_Activity_1Week"][short_window:] > bac_df["Avg_Activity_2Week"][short_window:], 1.0, 0.0
)

# Calculate the points in time when the Signal value changes
# Identify trade entry (1) and exit (-1) points
bac_df["Entry/Exit"] = bac_df["Signal"].diff()

# Review the DataFrame
bac_df.tail()

Unnamed: 0,Close,Volume,score,activity,Avg_Activity_1Week,Avg_Activity_2Week,Signal,Entry/Exit
2021-03-24,36.900002,43896800.0,8.0,569.0,3053.714286,1934.071429,1.0,0.0
2021-03-25,37.66,47517900.0,10.0,614.0,1699.571429,1909.857143,0.0,-1.0
2021-03-26,38.68,66733600.0,5.0,523.0,1331.857143,1884.785714,0.0,0.0
2021-03-29,38.310001,45680000.0,5.0,1198.0,1065.571429,1924.285714,0.0,0.0
2021-03-30,38.990002,56705900.0,24.0,733.0,821.285714,1945.0,0.0,0.0


In [16]:
plot = bac_df[["activity", "Avg_Activity_1Week", "Avg_Activity_2Week"]]
plot.hvplot()

In [17]:
# Visualize exit position relative to close price
exit = bac_df[bac_df['Entry/Exit'] == -1.0]['activity'].hvplot.scatter(
    color= 'purple',
    marker = 'v',
    legend=False,
    width=1000,
    height=400)

# Visualize entry position relative to close price
entry = bac_df[bac_df['Entry/Exit'] == 1.0]['activity'].hvplot.scatter(
    color='green',
    marker = '^',
    legend=False,
    width=1000,
    height=400)

# Visualize the close price for the investment
bac_close = bac_df[['activity']].hvplot(
    line_color='lightgray',
    ylabel='Activity',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = bac_df[["Avg_Activity_1Week", "Avg_Activity_2Week"]].hvplot(
    width=1000,
    height=400)

# Overlay the plots
entry_exit_plot = bac_close * moving_avgs * entry * exit
entry_exit_plot.opts(
    title="BAC - Social Media Sentiment Trading Algorithm"
)

In [18]:
# Plot closing prices and social media activity for one year timeframe
bac_close_plot = bac_df['Close'].hvplot(
    title = 'BAC Closing Prices',
    ylabel = 'Price in $')

bac_activity_plot = bac_activity_df['activity'].hvplot(
    title = 'BAC Activity',)

bac_close_plot + bac_activity_plot

### Backtest the Trading Strategy

In [19]:
# Set the initial capital
initial_capital = float(100000)

# Set the share size
share_size = bac_df["score"]

# Take shares in amount of score for position where the dual moving average crossover is 1 (SMA7 is greater than SMA30)
bac_df["Shares"] = abs(share_size) * bac_df["Entry/Exit"]

# Multiply share price by entry/exit positions and get the cumulatively sum
bac_df["Portfolio Holdings"] = (
    bac_df["Close"] * bac_df["Shares"].cumsum()
)

# Subtract the initial capital by the portfolio holdings to get the amount of liquid cash in the portfolio
bac_df["Portfolio Cash"] = (
    initial_capital - (bac_df["Close"] * bac_df["Shares"]).cumsum()
)

# Get the total portfolio value by adding the cash amount by the portfolio holdings (or investments)
bac_df["Portfolio Total"] = (
    bac_df["Portfolio Cash"] + bac_df["Portfolio Holdings"]
)

# Calculate the portfolio daily returns
bac_df["Portfolio Daily Returns"] = bac_df["Portfolio Total"].pct_change()

# Calculate the cumulative returns
bac_df["Portfolio Cumulative Returns"] = (
    1 + bac_df["Portfolio Daily Returns"]
).cumprod() - 1

# Print the DataFrame
bac_df.head()

Unnamed: 0,Close,Volume,score,activity,Avg_Activity_1Week,Avg_Activity_2Week,Signal,Entry/Exit,Shares,Portfolio Holdings,Portfolio Cash,Portfolio Total,Portfolio Daily Returns,Portfolio Cumulative Returns
2020-04-20,22.5,74446300.0,11.0,42.0,,,0.0,,,,,,,
2020-04-21,21.639999,78871300.0,-1.0,956.0,,,0.0,0.0,0.0,0.0,100000.0,100000.0,,
2020-04-22,21.799999,49759100.0,25.0,414.0,,,0.0,0.0,0.0,0.0,100000.0,100000.0,0.0,0.0
2020-04-23,21.870001,50614800.0,25.0,147.0,,,0.0,0.0,0.0,0.0,100000.0,100000.0,0.0,0.0
2020-04-24,22.18,47675700.0,2.0,54.0,,,0.0,0.0,0.0,0.0,100000.0,100000.0,0.0,0.0


In [20]:
# Visualize exit positions relative to total portfolio value
entry = bac_df[bac_df["Entry/Exit"] == 1.0]["Portfolio Total"].hvplot.scatter(
    color='purple',
    marker='^',
    legend=False, 
    ylabel="Total Portfolio Value", 
    width=1000, 
    height=400
)

# Visualize entry positions relative to total portfolio value
exit = bac_df[bac_df["Entry/Exit"] == -1.0]["Portfolio Total"].hvplot.scatter(
    color='yellow',
    marker='v',
    legend=False, 
    ylabel="Total Portfolio Value", 
    width=1000, 
    height=400
)

# Visualize the total portoflio value for the investment
total_portfolio_value = bac_df[['Portfolio Total']].hvplot(
    line_color='lightgray',
    ylabel='Total Portfolio Value',
    width=1000,
    height=400
)

# Overlay the plots
portfolio_entry_exit_plot = total_portfolio_value * entry * exit
portfolio_entry_exit_plot.opts(
    title="Social Sentiment Trading Algorithm - BAC Total Portfolio Value",
    yformatter='%.0f'
)

In [21]:
# Create the list of the metric names
metrics = [
    'Annualized Return',
    'Cumulative Returns',
    'Annual Volatility',
    'Sharpe Ratio',
]

# Create a list that holds the column name
columns = ['Backtest']

# Initialize the DataFrame with index set to evaluation metrics and columns 
portfolio_evaluation_df = pd.DataFrame(index=metrics, columns=columns)

# Calculate the Annualized return metric
portfolio_evaluation_df.loc['Annualized Return'] = (
    bac_df['Portfolio Daily Returns'].mean() * 252
)

# Calculate the Cumulative returns metric
portfolio_evaluation_df.loc['Cumulative Returns'] = bac_df['Portfolio Cumulative Returns'][-1]

# Calculate the Annual volatility metric
portfolio_evaluation_df.loc['Annual Volatility'] = (
    bac_df['Portfolio Daily Returns'].std() * np.sqrt(252)
)

# Calculate the Sharpe ratio
portfolio_evaluation_df.loc['Sharpe Ratio'] = (
    bac_df['Portfolio Daily Returns'].mean() * 252) / (
    bac_df['Portfolio Daily Returns'].std() * np.sqrt(252)
)

# Review the portfolio evaluation DataFrame
portfolio_evaluation_df

Unnamed: 0,Backtest
Annualized Return,0.0186216
Cumulative Returns,0.0173219
Annual Volatility,0.00970525
Sharpe Ratio,1.91871


In [22]:
# Initialize the trade evaluation DataFrame
trade_evaluation_df = pd.DataFrame(
    columns=[
        'Stock', 
        'Entry Date', 
        'Exit Date', 
        'Shares', 
        'Entry Share Price', 
        'Exit Share Price', 
        'Entry Portfolio Holding', 
        'Exit Portfolio Holding', 
        'Profit/Loss']
)

# Initialize the iterative variables
entry_date = ""
exit_date = ""
entry_portfolio_holding = 0.0
exit_portfolio_holding = 0.0
share_size = 0
entry_share_price = 0.0
exit_share_price = 0.0

# Loop through the signal DataFrame
# If `Entry/Exit` is 1, set entry trade metrics
# Else if `Entry/Exit` is -1, set exit trade metrics and calculate profit,
# Then append the record to the trade evaluation DataFrame
for index, row in bac_df.iterrows():
    if row['Entry/Exit'] == 1:
        entry_date = index
        entry_portfolio_holding = abs(row['Portfolio Holdings'])
        share_size = row['Shares']
        entry_share_price = row['Close']

    elif row['Entry/Exit'] == -1:
        exit_date = index
        exit_portfolio_holding = abs(row['Close'] * row['Shares'])
        exit_share_price = row['Close']
        profit_loss =  entry_portfolio_holding - exit_portfolio_holding
        trade_evaluation_df = trade_evaluation_df.append(
            {
                'Stock': 'BAC',
                'Entry Date': entry_date,
                'Exit Date': exit_date,
                'Shares': share_size,
                'Entry Share Price': entry_share_price,
                'Exit Share Price': exit_share_price,
                'Entry Portfolio Holding': entry_portfolio_holding,
                'Exit Portfolio Holding': exit_portfolio_holding,
                'Profit/Loss': profit_loss
            },
            ignore_index=True)

# Review the DataFrame
trade_evaluation_df

Unnamed: 0,Stock,Entry Date,Exit Date,Shares,Entry Share Price,Exit Share Price,Entry Portfolio Holding,Exit Portfolio Holding,Profit/Loss
0,BAC,2020-05-14,2020-05-22,20.0,21.709999,22.66,434.19998,294.58,139.61998
1,BAC,2020-05-26,2020-05-27,15.0,24.280001,25.98,534.160022,233.82,300.340022
2,BAC,2020-05-28,2020-06-15,0.0,24.860001,25.15,323.180013,100.6,222.580013
3,BAC,2020-06-24,2020-06-29,48.0,23.809999,23.389999,1357.169943,701.69997,655.469973
4,BAC,2020-07-13,2020-07-28,13.0,24.190001,24.360001,967.60004,462.840019,504.760021
5,BAC,2020-08-17,2020-08-27,10.0,25.9,26.049999,802.9,26.049999,776.850001
6,BAC,2020-08-28,2020-09-01,36.0,26.299999,25.709999,1735.799934,154.259994,1581.53994
7,BAC,2020-09-03,2020-09-04,23.0,25.66,26.540001,2129.78,318.480012,1811.299988
8,BAC,2020-09-08,2020-09-09,32.0,25.48,25.51,2624.44,178.57,2445.87
9,BAC,2020-09-21,2020-09-30,55.0,24.469999,24.09,3694.969849,481.8,3213.169849


In [23]:
trade_evaluation_df["Profit/Loss"].sum()

44310.99036599999

In [24]:
##ML models

In [25]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [26]:
#Split data into training and testing datasets for aapl_df
# Assign a copy of the Close and activity columns to a features DataFrame called X
X = bac_df[['Close', 'activity']].shift().dropna()

# Review the DataFrame
X.head()

Unnamed: 0,Close,activity
2020-04-21,22.5,42.0
2020-04-22,21.639999,956.0
2020-04-23,21.799999,414.0
2020-04-24,21.870001,147.0
2020-04-30,22.18,54.0


In [27]:
# Create the target set selecting the Signal column and assiging it to y
y = bac_df['Signal']

# Review the value counts
y.value_counts()

0.0    122
1.0    113
Name: Signal, dtype: int64

In [28]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2020-04-21 00:00:00


In [29]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)

# Display the training end date
print(training_end)

2020-07-21 00:00:00


In [30]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end] # df.loc[row, column] 
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0,Close,activity
2020-04-21,22.5,42.0
2020-04-22,21.639999,956.0
2020-04-23,21.799999,414.0
2020-04-24,21.870001,147.0
2020-04-30,22.18,54.0


In [31]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0,Close,activity
2020-07-22,24.42,82.0
2020-07-23,24.309999,130.0
2020-07-24,24.540001,592.0
2020-07-27,24.35,222.0
2020-07-28,24.139999,2373.0


In [32]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [33]:
# Use the svc classifier model from SKLearn's support vector machine (SVM) learning method to fit the training data and make predictions.

# From SVM, instantiate SVC classifier model instance
svm_model = svm.SVC()
 
# Fit the model to the data using the training data
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the testing data to make the model predictions
svm_pred = svm_model.predict(X_test_scaled)

# Review the model's predicted values
svm_pred[:10]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [34]:
# Use a classification report to evaluate the model using the predictions and testing data
svm_testing_report = classification_report(y_test, svm_pred)

# Print the classification report
print(svm_testing_report)

              precision    recall  f1-score   support

         0.0       0.30      0.03      0.06        89
         1.0       0.48      0.92      0.63        85

    accuracy                           0.47       174
   macro avg       0.39      0.48      0.34       174
weighted avg       0.39      0.47      0.34       174



In [35]:
#Using Logistic Regression as second machine learning model.

In [36]:
# Import a new classifier from SKLearn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

# Initiate the model instance
logistic_regression_model = LogisticRegression()
# logistic_regression_model=DecisionTreeClassifier()
# logistic_regression_model=svm.SVC()
# logistic_regression_model=AdaBoostClassifier()

In [37]:
len(X_test_scaled)

174

In [38]:
# Fit the model using the training data
model = logistic_regression_model.fit(X_train_scaled, y_train)

# Use the testing dataset to generate the predictions for the new model
pred = model.predict(X_test_scaled)

# Review the model's predicted values
pred

array([1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1.,
       1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1.])

In [39]:
# Use a classification report to evaluate the model using the predictions and testing data
lr_testing_report = classification_report(y_test, pred)

# Print the classification report
print(lr_testing_report)

              precision    recall  f1-score   support

         0.0       0.41      0.10      0.16        89
         1.0       0.47      0.85      0.61        85

    accuracy                           0.47       174
   macro avg       0.44      0.47      0.38       174
weighted avg       0.44      0.47      0.38       174



In [40]:
y_test

2020-07-22    1.0
2020-07-23    1.0
2020-07-24    1.0
2020-07-27    1.0
2020-07-28    0.0
             ... 
2021-03-24    1.0
2021-03-25    0.0
2021-03-26    0.0
2021-03-29    0.0
2021-03-30    0.0
Name: Signal, Length: 174, dtype: float64