In [289]:
# Implementing Support Vector machines model on dataset for stock price prediction
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import yfinance as yf
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Importing the dataset
initial_df = pd.read_csv('SP500_stock_dataset_PE.csv', delimiter=',')
#initial_df = pd.read_csv('SP500_stock_prices.csv', delimiter=',')

# # Create a new column 'Price Movement'
# df['Daily Price Movement'] = np.where(df['Close'] > df['Close'].shift(1), 1, 0)

initial_df.head()

Unnamed: 0,Date,Ticker,Open,Low,High,Close,Volume,Sector,PE-Ratio
0,2016-01-04,MMM,148.050003,145.399994,148.320007,146.820007,3277200,Industrials,19.369394
1,2016-01-05,MMM,146.820007,145.610001,147.5,147.460007,2688100,Industrials,19.453827
2,2016-01-06,MMM,145.589996,143.419998,145.759995,144.490005,2997100,Industrials,19.062006
3,2016-01-07,MMM,142.520004,140.630005,143.130005,140.970001,3553500,Industrials,18.597625
4,2016-01-08,MMM,141.360001,140.220001,142.5,140.490005,2664000,Industrials,18.534302


In [290]:
# Make date column the index
initial_df = initial_df.set_index('Date')

In [291]:
# only display year, month and day in index
initial_df.index = pd.to_datetime(initial_df.index).date
data = initial_df
data
# make return column
data['Return'] = data['Close'].pct_change()

In [87]:
print(data['Sector'].unique())
# assign each different Sector string its own number
data['Sector'] = data['Sector'].map({'Consumer Discretionary': 0, 'Consumer Staples': 1, 'Energy': 2, 'Financials': 3, 'Health Care': 4, 'Industrials': 5, 'Information Technology': 6, 'Materials': 7, 'Real Estate': 8, 'Communication Services': 9, 'Utilities': 10})

['Industrials' 'Health Care' 'Information Technology'
 'Communication Services' 'Consumer Staples' 'Consumer Discretionary'
 'Utilities' 'Financials' 'Materials' 'Real Estate' 'Energy']


In [88]:
# make a new column for whether or not the price has increased or decreased from the previous 62 days
# 1 = increased, 0 = decreased, doing so by comparing openprice of today with openprice of 62 days ago
data['62-day Price Movement'] = np.where(data['Open'] > data['Open'].shift(-62), 1, 0)

In [89]:
# Make the tickers to categoricals, but keep the original order of the tickers
data['Ticker'] = pd.Categorical(data['Ticker'], categories=data['Ticker'].unique(), ordered=True)
data['Ticker'] = data['Ticker'].cat.codes
# # make target column
# data['Target'] = data['62-day Price Movement'].shift(-62)
# #data['Price Movement 62 Days'] = data['Price Movement'].shift(62)
# data.dropna(inplace=True)
# data

In [90]:
# select only the ticker index
indiviual_datas = []

for ticker in data['Ticker'].unique():
    #data_ticker = data.xs(0, level=1).copy()
    data_ticker = data[data['Ticker'] == ticker].copy()
    # make target column
    data_ticker['Target'] = data_ticker['62-day Price Movement'].shift(-62)
    #data['Price Movement 62 Days'] = data['Price Movement'].shift(62)
    data_ticker.dropna(inplace=True)
    data_ticker

    indiviual_datas.append(data_ticker)

data_all_tickers = pd.concat(indiviual_datas)

data_all_tickers

Unnamed: 0,Ticker,Open,Low,High,Close,Volume,Sector,PE-Ratio,Return,62-day Price Movement,Target
2016-01-05,0,146.820007,145.610001,147.500000,147.460007,2688100,5,19.453827,0.004359,0,0.0
2016-01-06,0,145.589996,143.419998,145.759995,144.490005,2997100,5,19.062006,-0.020141,0,0.0
2016-01-07,0,142.520004,140.630005,143.130005,140.970001,3553500,5,18.597625,-0.024362,0,0.0
2016-01-08,0,141.360001,140.220001,142.500000,140.490005,2664000,5,18.534302,-0.003405,0,0.0
2016-01-11,0,140.970001,139.410004,141.429993,140.460007,2775500,5,18.530344,-0.000214,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2019-12-26,491,133.190002,132.320007,133.190002,133.029999,929400,4,43.904290,0.000828,1,0.0
2019-12-27,491,133.399994,132.380005,133.479996,133.250000,1296100,4,43.976898,0.001654,1,0.0
2019-12-30,491,133.570007,131.809998,133.660004,132.210007,942000,4,43.633666,-0.007805,1,0.0
2019-12-31,491,132.009995,131.449997,132.580002,132.350006,1128600,4,42.419874,0.001059,1,0.0


In [91]:
# check PE-Ratio for inf values
# find Tickers with inf values for PE-Ratio
data_all_tickers[data_all_tickers['PE-Ratio'] == np.inf]['Ticker'].unique()
# removing all rows with the tickers that have infinity values in their PE-Ratios
for wrong_ticker in data_all_tickers[data_all_tickers['PE-Ratio'] == np.inf]['Ticker'].unique():
    data_all_tickers = data_all_tickers[data_all_tickers['Ticker'] != wrong_ticker]

In [238]:
# Splitting the dataset into the Training set and Test set according to date
# Define the specific date to split the DataFrame
# Create an offset of 62 Business days
bd = pd.tseries.offsets.BusinessDay(n = 62)
split_date = pd.to_datetime('2019-09-30') - bd

# set a start date for the training set to be 62 days after the first date in the dataset
start_date = pd.to_datetime('2016-01-04') + bd

# small test dataset creation:

# bd = pd.tseries.offsets.BusinessDay(n = 62)
# split_date = pd.to_datetime('2019-11-30') - bd
# start_date = pd.to_datetime('2018-01-04') + bd


# Split the DataFrame into training and test sets based on the specific date
# for train we want all the data from the start date to the split date, this ensures that we have 62 days of data for each stock
# for test we want all the data from the split date to the end of the dataset
train = data_all_tickers.loc[(start_date < data_all_tickers.index) & (data_all_tickers.index < split_date)]
test = data_all_tickers.loc[data_all_tickers.index >= split_date]

X_train = train[['Ticker','Open', 'Low', 'High', 'Volume', 'Sector', 'PE-Ratio']]
y_train = train['Target']
X_test = test[['Ticker', 'Open', 'Low', 'High', 'Volume', 'Sector', 'PE-Ratio']]
y_test = test['Target']


  result = libops.scalar_compare(x.ravel(), y, op)


In [187]:
# print any nan values in the test set
print(test.isnull().sum())

Ticker                        0
Open                          0
Low                           0
High                          0
Close                         0
Volume                        0
Sector                        0
PE-Ratio                      0
Return                        0
62-day Price Movement         0
Target                        0
Prediction                    0
Prediction Probability (0)    0
Prediction Probability (1)    0
dtype: int64


In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Standardize the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVM model
model = SVC(kernel='rbf', probability=True) # linear, rbf, poly, sigmoid
model.fit(X_train, y_train)

# Make a prediction using the trained SVM model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.44019620796723263


In [29]:
# save model to file
import pickle
pickle.dump(model, open('svm_modelwithoutPE.sav', 'wb'))

In [30]:
y_pred_proba = model.predict_proba(X_test)

In [55]:
# only keep the relevant dates for the test set
ticker_name_df = pd.read_csv('SP500_stock_dataset_PE.csv', delimiter=',')
# Make date column the index
ticker_name_df = ticker_name_df.set_index('Date')
# only display year, month and day in index
ticker_name_df.index = pd.to_datetime(ticker_name_df.index).date
for wrong_ticker in ticker_name_df[ticker_name_df['PE-Ratio'] == np.inf]['Ticker'].unique():
    ticker_name_df = ticker_name_df[ticker_name_df['Ticker'] != wrong_ticker]
# only keep the Ticker
ticker_name_df = ticker_name_df[['Ticker']]
# drop duplicates
ticker_name_df = ticker_name_df.drop_duplicates()
# remove the index
ticker_name_df = ticker_name_df.reset_index(drop=True)
ticker_name_df

Unnamed: 0,Ticker
0,MMM
1,AOS
2,ABT
3,ABBV
4,ACN
...,...
479,YUM
480,ZBRA
481,ZBH
482,ZION


In [53]:
# number of unique tickers in ticker_name_df
len(ticker_name_df['Ticker'].unique())

484

In [93]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# Read the table of components from the Wikipedia page
sp500_components = pd.read_html(url)[0]
# Extract the ticker  from their columns
tickers = sp500_components["Symbol"].tolist()
# make the tickers into a dictionary, with the index as the key and the ticker as the value
ticker_dict = dict(zip(sp500_components.index, tickers))

In [263]:
# Convert unique tickers to list
unique_tickers = data_all_tickers["Ticker"].unique().tolist()

# Map numbers to ticker symbols
ticker_symbols = [ticker_dict[ticker] for ticker in unique_tickers]

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 186,

In [239]:
test['Ticker'] = test['Ticker'].astype('category')
#  make y_pred and y_pred_proba a column in the test dataset
test['Prediction'] = y_pred
test['Prediction Probability (0)'] = y_pred_proba[:,0]
test['Prediction Probability (1)'] = y_pred_proba[:,1]
# only keep columns for ticker, target, and prediction
test = test[['Ticker', 'Target', 'Prediction', 'Prediction Probability (1)', 'Prediction Probability (0)', 'Return']]
# make the ticker column a name again


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Ticker'] = test['Ticker'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Prediction'] = y_pred
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Prediction Probability (0)'] = y_pred_proba[:,0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try 

__Portfolio Performance, if bought on each testing day and kept for respective 62 trade days.__

In [440]:
import cvxpy as cp
import pandas as pd



def calculate_optimal_weights(test, date):
    # only keep the rows of a specific date
    # make a date be of type datetime
    date = pd.to_datetime(date)
    
    # put the ticker names back into the test dataset
   
    # test['Ticker'] = test['Ticker'].astype('category')
    # #  make y_pred and y_pred_proba a column in the test dataset
    # test['Prediction'] = y_pred
    # test['Prediction Probability (0)'] = y_pred_proba[:,0]
    # test['Prediction Probability (1)'] = y_pred_proba[:,1]
    # # only keep columns for ticker, target, and prediction
    # test = test[['Ticker', 'Target', 'Prediction', 'Prediction Probability (1)', 'Prediction Probability (0)', 'Return']]
    # # make the ticker column a name again
    # # make a new dataframe where return is the index and each column is a ticker
    # return_df = test.pivot(columns='Ticker', values='Return')
    # # remove stocks that have nan values in the return column
    # return_df = return_df.dropna(axis=1)

    # make a new dataframe where return is the index and each column is a ticker
    return_df = test.pivot(columns='Ticker', values='Return')
    # remove stocks that have nan values in the return column
    return_df = return_df.dropna(axis=1)
    # for each ticker only keep the row of the input date
    test = test.groupby('Ticker').apply(lambda x: x.loc[x.index == date])
    test['Ticker'] = test['Ticker'].cat.remove_unused_categories()
    
    # ensure that the tickers in test are the same as the tickers in return_df
    test = test[test['Ticker'].isin(return_df.columns)]

    
    return_deviation = return_df.iloc[0:62]
    
    # Filter the dataframe for the given date
    #df_date = df_date[df_date.index == date]
    #df_date = df_date[pd.DatetimeIndex(df_date.index).date == date]
    # find all places in return_df that are nan
    

    # Calculate the covariance matrix
    cov_matrix = return_deviation.cov()
    # Force the covariance matrix to be symmetric
    cov_matrix = (cov_matrix + cov_matrix.T) / 2
    
    # Number of stocks
    #n = len(ticker_symbols)
    n = len(test['Ticker'])
    #print('test',test['Ticker'].head(),len(test))
    # Weights for each stock in the portfolio
    w = cp.Variable(n)
    #print(w.shape, cov_matrix.shape,n)
    # Objective is to minimize portfolio variance
    risk = cp.quad_form(w, cov_matrix)
    objective = cp.Minimize(risk)
    

    # Constraints:
    # - weights must sum to 1
    # - only consider stocks where the SVM prediction is 1 (price will increase)
    # - expected portfolio return must be at least a certain value (e.g., 0.01)
    constraints = [
        cp.sum(w) == 1,
        w >= 0,
        #w[df_date["Prediction"] == 0] == 0,
        w[test["Prediction"] == 0 ] == 0,
        #w.T @ df_date["Prediction Probability (1)"] >= 0.05
        w.T @ test["Prediction Probability (1)"] >= 0.05
        

    ]
    

    # Solve the optimization problem
    problem = cp.Problem(objective, constraints)

    problem.solve()

    # Get the optimal weights
    optimal_weights = w.value
    # Create a dataframe with the optimal weights
    optimal_weights_df = pd.DataFrame(optimal_weights, index=test["Ticker"])
    # add the ticker names to the dataframe
    optimal_weights_df = optimal_weights_df.join(ticker_name_df)
    # remove the index
    optimal_weights_df = optimal_weights_df.reset_index(drop=True)
    # make the ticker the index
    optimal_weights_df = optimal_weights_df.set_index('Ticker')
    # make the column name weights
    optimal_weights_df.columns = ['Weights']
   


    # Sort the dataframe by the optimal weights
    optimal_weights_df = optimal_weights_df.sort_values(by=['Weights'], ascending=False)

    # Only keep the top 10 stocks and their weights
    optimal_weights_df = optimal_weights_df.head(10)

    return round(optimal_weights_df,3)


In [300]:

import pandas as pd

def calculate_return(ticker, df, start_date):
    # Convert string dates to datetime
    start_date = pd.to_datetime(start_date)
    # Calculate the end date as start date + 62 trading days
    end_date = start_date + pd.tseries.offsets.BDay(62)
    
    # If ticker symbols are in the DataFrame as a column
    if 'Ticker' in df.columns:
        df_ticker = df[df['Ticker'] == ticker]
    # If ticker symbols are in the DataFrame as an index
    else:
        df_ticker = df.loc[ticker]

    # Filter data between start and end dates
    mask = (df_ticker.index >= start_date) & (df_ticker.index <= end_date)
    df_ticker = df_ticker.loc[mask]
    
    # Get the opening price at the start and end dates
    start_price = df_ticker['Open'].iloc[0]
    end_price = df_ticker['Open'].iloc[-1]

    # Calculate the return
    return_percent = ((end_price - start_price) / start_price) * 100

    return round(return_percent,2)


In [441]:
# for each date in the test set calculate the return of the portfolio
# empty list to store the returns
returns_list = []
# for each date in the test set
for date in test.index.unique():
    # calculate the optimal weights for the portfolio
    optimal_weights_df = calculate_optimal_weights(test, date)
    # calculate the return of the portfolio
    # for the top 10 weighted stocks in the portfolio calculate their return
    # empty list to store the returns
    return_list = []
    # for each stock in the portfolio
    for ticker in optimal_weights_df.index:
        if optimal_weights_df.loc[ticker]['Weights'] > 0:
            # calculate the return of the stock
            return_list.append(calculate_return(ticker, initial_df, date))
    # Calculate the return of the portfolio
    portfolio_return = 0
    for i in range(len(return_list)):
        portfolio_return += return_list[i] * optimal_weights_df.iloc[i]['Weights']
    # append the return to the list
    returns_list.append(round(portfolio_return,1))


        Weights
Ticker         
AMZN      0.542
NOW       0.387
NWS       0.065
TSCO      0.005
ALGN      0.000
WBA       0.000
QCOM      0.000
EQIX      0.000
OMC       0.000
UAL       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.542
NOW       0.387
NWS       0.065
TSCO      0.005
ALGN      0.000
WBA       0.000
QCOM      0.000
OMC       0.000
UAL       0.000
AAL       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.542
NOW       0.387
NWS       0.065
TSCO      0.005
ALGN      0.000
WBA       0.000
QCOM      0.000
UAL       0.000
EQIX      0.000
OMC       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.445
F         0.404
NWS       0.152
TSCO      0.000
ALGN      0.000
MSFT      0.000
NTAP      0.000
ANET      0.000
QCOM      0.000
IR        0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.542
NOW       0.387
NWS       0.065
TSCO      0.005
ALGN      0.000
WBA       0.000
QCOM      0.000
EQIX      0.000
OMC       0.000
F         0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.360
AMZN      0.287
F         0.281
NWS       0.072
NaN       0.000
ENPH      0.000
CCI       0.000
MKC       0.000
VZ        0.000
CDAY      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.350
AMZN      0.320
F         0.268
NWS       0.062
UHS       0.000
DXCM      0.000
LRCX      0.000
MAS       0.000
NRG       0.000
ALGN      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.350
AMZN      0.320
F         0.268
NWS       0.062
TSCO      0.000
MPWR      0.000
XEL       0.000
ETSY      0.000
MHK       0.000
NWSA      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.542
NOW       0.387
NWS       0.065
TSCO      0.005
ALGN      0.000
WBA       0.000
QCOM      0.000
OMC       0.000
UAL       0.000
AAL       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.614
NOW       0.386
MAS       0.000
PKI       0.000
IP        0.000
AMT       0.000
FICO      0.000
DXCM      0.000
MDT       0.000
CCI       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.381
NOW       0.350
F         0.269
ALGN      0.000
QCOM      0.000
NaN       0.000
ADSK      0.000
IR        0.000
BSX       0.000
IP        0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.381
NOW       0.350
F         0.269
ALGN      0.000
IP        0.000
IR        0.000
NaN       0.000
BSX       0.000
ADSK      0.000
QCOM      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.614
NOW       0.386
MAS       0.000
PKI       0.000
IP        0.000
AMT       0.000
FICO      0.000
DXCM      0.000
MDT       0.000
CME       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.361
AMZN      0.259
F         0.242
MSFT      0.138
GD        0.000
NWSA      0.000
GLW       0.000
MPWR      0.000
AAP       0.000
ALGN      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.397
AMZN      0.396
MSFT      0.206
TSCO      0.000
AMD       0.000
EQT       0.000
NWS       0.000
NEE       0.000
NaN       0.000
UNH       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.358
F         0.244
BKNG      0.179
MSFT      0.155
NWS       0.064
GD        0.000
WYNN      0.000
LYB       0.000
TSLA      0.000
FDS       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.336
F         0.232
BKNG      0.183
AMZN      0.147
MSFT      0.102
TSCO      0.000
INTC      0.000
MPWR      0.000
MLM       0.000
ANET      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.361
AMZN      0.259
F         0.242
MSFT      0.138
ENPH      0.000
ALGN      0.000
INTC      0.000
MKTX      0.000
CAG       0.000
IRM       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.317
F         0.242
AMZN      0.207
MSFT      0.119
FIS       0.115
TSLA      0.000
CCL       0.000
GD        0.000
LYB       0.000
WYNN      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.361
AMZN      0.259
F         0.242
MSFT      0.138
ENPH      0.000
ALGN      0.000
INTC      0.000
MKTX      0.000
CAG       0.000
RTX       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.429
F         0.301
MSFT      0.270
ALGN      0.000
QCOM      0.000
IP        0.000
IR        0.000
BSX       0.000
NaN       0.000
DXCM      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.428
F         0.303
MSFT      0.269
TSCO      0.000
ALGN      0.000
TFX       0.000
QCOM      0.000
BWA       0.000
AAL       0.000
BBWI      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.429
F         0.301
MSFT      0.270
GD        0.000
QCOM      0.000
ALGN      0.000
AAL       0.000
GLW       0.000
MPWR      0.000
DISH      0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.361
AMZN      0.259
F         0.242
MSFT      0.138
GD        0.000
NWSA      0.000
GLW       0.000
ALGN      0.000
QCOM      0.000
HON       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.300
NOW       0.281
LW        0.241
EQIX      0.149
MSFT      0.029
BKNG      0.000
SNA       0.000
F         0.000
TSCO      0.000
LOW       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.264
F         0.228
AMZN      0.221
EQIX      0.185
MSFT      0.101
NWSA      0.000
GD        0.000
MPWR      0.000
PSX       0.000
GLW       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.344
NOW       0.284
EQIX      0.212
MSFT      0.160
NWSA      0.000
GD        0.000
MPWR      0.000
PSX       0.000
DXC       0.000
HES       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.254
EQIX      0.215
AMZN      0.211
BKNG      0.206
MSFT      0.115
NWSA      0.000
GD        0.000
MPWR      0.000
PSX       0.000
HES       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.307
F         0.275
EQIX      0.213
MSFT      0.206
TSCO      0.000
ALGN      0.000
QCOM      0.000
MDT       0.000
ENPH      0.000
AMD       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NOW       0.264
F         0.228
AMZN      0.221
EQIX      0.185
MSFT      0.101
TSCO      0.000
INTC      0.000
MPWR      0.000
ANET      0.000
MLM       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.307
NOW       0.283
EQIX      0.214
MSFT      0.196
DXC       0.000
NWSA      0.000
TFX       0.000
MKTX      0.000
MPWR      0.000
BWA       0.000


  result = libops.scalar_compare(x.ravel(), y, op)


KeyboardInterrupt: 

In [406]:
# Make a dataframe with the returns and dates
returns_df = pd.DataFrame(returns_list, index=test.index.unique())
# make the column name returns
returns_df.columns = ['Portfolio 62-day Returns (%)']
returns_df

Unnamed: 0,Portfolio 62-day Returns (%)
2019-07-05,-9.7
2019-07-08,-11.6
2019-07-09,-12.1
2019-07-10,-11.4
2019-07-11,-11.9
...,...
2019-12-26,-24.8
2019-12-27,-25.7
2019-12-30,-24.2
2019-12-31,-15.0


SP500 return in the same period

In [407]:
import yfinance as yf
from pandas_datareader import data as pdr
# This line is needed to fix a problem with pandas_datareader
yf.pdr_override()

start_date = '2019-07-05'
end_date = '2020-04-02'
# 
# Get data from Yahoo Finance
sp500 = pdr.get_data_yahoo('^GSPC', start=start_date, end=end_date)
# we are only interested in the open price
sp500 = sp500['Open']
# Transform it into a dataframe
sp500_returns = pd.DataFrame(sp500)
# Make 'S&P 500 Index Price' the column header
sp500_returns = sp500_returns.rename(columns={'Open': 'S&P 500 Index Price'})
# Calculate the 3 month return for each day
sp500_returns['S&P 500 - 3 Month Return (%)'] = sp500_returns['S&P 500 Index Price'].pct_change(periods=62).shift(-62)
# calculating all 3-month return values by 100 to obtain percent
sp500_returns['S&P 500 - 3 Month Return (%)'] = sp500_returns['S&P 500 - 3 Month Return (%)'] * 100
sp500_returns.round(2)
# remove the S&P 500 Index Price column
sp500_returns = sp500_returns.drop(columns=['S&P 500 Index Price'])
# removing all rows with NaN values (The last 62 days, since they don't have data for 3 months into the future)
sp500_returns = sp500_returns.dropna()
sp500_returns

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,S&P 500 - 3 Month Return (%)
Date,Unnamed: 1_level_1
2019-07-05,-1.992795
2019-07-08,-3.167699
2019-07-09,-1.583532
2019-07-10,-1.507713
2019-07-11,-2.641008
...,...
2019-12-26,-22.493490
2019-12-27,-21.290758
2019-12-30,-21.021332
2019-12-31,-18.676715


In [410]:
# Combine the returns of the portfolio and the S&P 500
combined_returns = pd.concat([returns_df, sp500_returns], axis='columns', join='inner')
# make a column that shows the difference between the portfolio returns and the S&P 500 returns
combined_returns['Difference (%)'] = combined_returns['Portfolio 62-day Returns (%)'] - combined_returns['S&P 500 - 3 Month Return (%)']
combined_returns.round(2)

Unnamed: 0,Portfolio 62-day Returns (%),S&P 500 - 3 Month Return (%),Difference (%)
2019-07-05,-9.7,-1.99,-7.71
2019-07-08,-11.6,-3.17,-8.43
2019-07-09,-12.1,-1.58,-10.52
2019-07-10,-11.4,-1.51,-9.89
2019-07-11,-11.9,-2.64,-9.26
...,...,...,...
2019-12-26,-24.8,-22.49,-2.31
2019-12-27,-25.7,-21.29,-4.41
2019-12-30,-24.2,-21.02,-3.18
2019-12-31,-15.0,-18.68,3.68


In [411]:
# average difference between the portfolio returns and the S&P 500 returns
average_difference = combined_returns['Difference (%)'].mean()
print('Average difference between the portfolio returns and the S&P 500 returns: ', round(average_difference,2), '%')

Average difference between the portfolio returns and the S&P 500 returns:  -3.15 %


__Backup-results in case it fails__:

In [None]:
# Make a dataframe with the returns and dates
returns_df = pd.DataFrame(returns_list, index=test.index.unique())
# make the column name returns
returns_df.columns = ['Portfolio 62-day Returns (%)']
returns_df

Unnamed: 0,Portfolio 62-day Returns (%)
2019-09-05,8.0
2019-09-06,6.1
2019-09-09,5.3
2019-09-10,5.4
2019-09-11,4.2
...,...
2019-12-26,-54.0
2019-12-27,-50.8
2019-12-30,-21.6
2019-12-31,-21.1
