In [181]:
# Implementing Support Vector machines model on dataset for stock price prediction
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import yfinance as yf
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Importing the dataset
initial_df = pd.read_csv('SP500_stock_prices.csv', delimiter=',')
#initial_df = pd.read_csv('SP500_stock_prices.csv', delimiter=',')

# # Create a new column 'Price Movement'
# df['Daily Price Movement'] = np.where(df['Close'] > df['Close'].shift(1), 1, 0)

initial_df.head()

Unnamed: 0,Date,Ticker,Open,Low,High,Close,Volume,Sector
0,2016-01-04,MMM,148.050003,145.399994,148.320007,146.820007,3277200,Industrials
1,2016-01-05,MMM,146.820007,145.610001,147.5,147.460007,2688100,Industrials
2,2016-01-06,MMM,145.589996,143.419998,145.759995,144.490005,2997100,Industrials
3,2016-01-07,MMM,142.520004,140.630005,143.130005,140.970001,3553500,Industrials
4,2016-01-08,MMM,141.360001,140.220001,142.5,140.490005,2664000,Industrials


In [182]:
# Make date column the index
initial_df = initial_df.set_index('Date')

In [183]:
# only display year, month and day in index
initial_df.index = pd.to_datetime(initial_df.index).date
data = initial_df
data
# make return column
data['Return'] = data['Close'].pct_change()

In [156]:
print(data['Sector'].unique())
# assign each different Sector string its own number
data['Sector'] = data['Sector'].map({'Consumer Discretionary': 0, 'Consumer Staples': 1, 'Energy': 2, 'Financials': 3, 'Health Care': 4, 'Industrials': 5, 'Information Technology': 6, 'Materials': 7, 'Real Estate': 8, 'Communication Services': 9, 'Utilities': 10})

['Industrials' 'Health Care' 'Information Technology'
 'Communication Services' 'Consumer Staples' 'Consumer Discretionary'
 'Utilities' 'Financials' 'Materials' 'Real Estate' 'Energy']


In [157]:
# make a new column for whether or not the price has increased or decreased from the previous 62 days
# 1 = increased, 0 = decreased, doing so by comparing openprice of today with openprice of 62 days ago
data['62-day Price Movement'] = np.where(data['Open'] > data['Open'].shift(-63), 1, 0)

In [158]:
# Make the tickers to categoricals, but keep the original order of the tickers
data['Ticker'] = pd.Categorical(data['Ticker'], categories=data['Ticker'].unique(), ordered=True)
data['Ticker'] = data['Ticker'].cat.codes
# # make target column
# data['Target'] = data['62-day Price Movement'].shift(-62)
# #data['Price Movement 62 Days'] = data['Price Movement'].shift(62)
# data.dropna(inplace=True)
# data

In [159]:
# select only the ticker index
indiviual_datas = []

for ticker in data['Ticker'].unique():
    #data_ticker = data.xs(0, level=1).copy()
    data_ticker = data[data['Ticker'] == ticker].copy()
    # make target column
    data_ticker['Target'] = data_ticker['62-day Price Movement'].shift(-63)
    #data['Price Movement 62 Days'] = data['Price Movement'].shift(62)
    data_ticker.dropna(inplace=True)
    data_ticker

    indiviual_datas.append(data_ticker)

data_all_tickers = pd.concat(indiviual_datas)

data_all_tickers

Unnamed: 0,Ticker,Open,Low,High,Close,Volume,Sector,Return,62-day Price Movement,Target
2016-01-05,0,146.820007,145.610001,147.500000,147.460007,2688100,5,0.004359,0,0.0
2016-01-06,0,145.589996,143.419998,145.759995,144.490005,2997100,5,-0.020141,0,0.0
2016-01-07,0,142.520004,140.630005,143.130005,140.970001,3553500,5,-0.024362,0,0.0
2016-01-08,0,141.360001,140.220001,142.500000,140.490005,2664000,5,-0.003405,0,0.0
2016-01-11,0,140.970001,139.410004,141.429993,140.460007,2775500,5,-0.000214,0,0.0
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,497,132.210007,131.289993,133.080002,132.919998,442500,4,0.004155,1,0.0
2019-12-26,497,133.190002,132.320007,133.190002,133.029999,929400,4,0.000828,1,0.0
2019-12-27,497,133.399994,132.380005,133.479996,133.250000,1296100,4,0.001654,1,0.0
2019-12-30,497,133.570007,131.809998,133.660004,132.210007,942000,4,-0.007805,1,0.0


In [477]:
# check PE-Ratio for inf values
# find Tickers with inf values for PE-Ratio
data_all_tickers[data_all_tickers['PE-Ratio'] == np.inf]['Ticker'].unique()
# removing all rows with the tickers that have infinity values in their PE-Ratios
for wrong_ticker in data_all_tickers[data_all_tickers['PE-Ratio'] == np.inf]['Ticker'].unique():
    data_all_tickers = data_all_tickers[data_all_tickers['Ticker'] != wrong_ticker]

In [161]:
# Splitting the dataset into the Training set and Test set according to date
# Define the specific date to split the DataFrame
# Create an offset of 62 Business days
#bd = pd.tseries.offsets.BusinessDay(n = 62)
#split_date = pd.to_datetime('2019-09-30') - bd

# set a start date for the training set to be 62 days after the first date in the dataset
#start_date = pd.to_datetime('2016-01-04') + bd

# small test dataset creation:

bd = pd.tseries.offsets.BusinessDay(n = 63)
split_date = pd.to_datetime('2019-11-30') - bd
start_date = pd.to_datetime('2018-09-30') + bd


# Split the DataFrame into training and test sets based on the specific date
# for train we want all the data from the start date to the split date, this ensures that we have 62 days of data for each stock
# for test we want all the data from the split date to the end of the dataset
train = data_all_tickers.loc[(start_date < data_all_tickers.index) & (data_all_tickers.index < split_date)]
test = data_all_tickers.loc[data_all_tickers.index >= split_date]

X_train = train[['Ticker','Open', 'Low', 'High', 'Volume', 'Sector']]
y_train = train['Target']
X_test = test[['Ticker', 'Open', 'Low', 'High', 'Volume', 'Sector']]
y_test = test['Target']


  result = libops.scalar_compare(x.ravel(), y, op)


In [162]:
# print any nan values in the test set
print(test.isnull().sum())

Ticker                   0
Open                     0
Low                      0
High                     0
Close                    0
Volume                   0
Sector                   0
Return                   0
62-day Price Movement    0
Target                   0
dtype: int64


In [163]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Standardize the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVM model
model = SVC(kernel='rbf', probability=True) # linear, rbf, poly, sigmoid
model.fit(X_train, y_train)

# Make a prediction using the trained SVM model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.3000631558492033


In [29]:
# save model to file
# import pickle
# pickle.dump(model, open('svm_modelwithoutPE.sav', 'wb'))

In [164]:
y_pred_proba = model.predict_proba(X_test)

In [173]:
# only keep the relevant dates for the test set
ticker_name_df = pd.read_csv('SP500_stock_prices.csv', delimiter=',')
# Make date column the index
ticker_name_df = ticker_name_df.set_index('Date')
# only display year, month and day in index
ticker_name_df.index = pd.to_datetime(ticker_name_df.index).date
# only keep the Ticker
ticker_name_df = ticker_name_df[['Ticker']]
# drop duplicates
ticker_name_df = ticker_name_df.drop_duplicates()
# remove the index
ticker_name_df = ticker_name_df.reset_index(drop=True)
ticker_name_df

Unnamed: 0,Ticker
0,MMM
1,AOS
2,ABT
3,ABBV
4,ACN
...,...
493,YUM
494,ZBRA
495,ZBH
496,ZION


In [174]:
# number of unique tickers in ticker_name_df
len(ticker_name_df['Ticker'].unique())

498

In [169]:
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
# Read the table of components from the Wikipedia page
sp500_components = pd.read_html(url)[0]
# Extract the ticker  from their columns
tickers = sp500_components["Symbol"].tolist()
# make the tickers into a dictionary, with the index as the key and the ticker as the value
ticker_dict = dict(zip(sp500_components.index, tickers))

In [175]:
# Convert unique tickers to list
unique_tickers = data_all_tickers["Ticker"].unique().tolist()

# Map numbers to ticker symbols
ticker_symbols = [ticker_dict[ticker] for ticker in unique_tickers]

In [176]:
len(ticker_symbols)

496

In [177]:
test['Ticker'] = test['Ticker'].astype('category')
#  make y_pred and y_pred_proba a column in the test dataset
test['Prediction'] = y_pred
test['Prediction Probability (0)'] = y_pred_proba[:,0]
test['Prediction Probability (1)'] = y_pred_proba[:,1]
# only keep columns for ticker, target, and prediction
test = test[['Ticker', 'Target', 'Prediction', 'Prediction Probability (1)', 'Prediction Probability (0)', 'Return']]
# make the ticker column a name again


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Ticker'] = test['Ticker'].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Prediction'] = y_pred
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['Prediction Probability (0)'] = y_pred_proba[:,0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try 

__Portfolio Performance, if bought on each testing day and kept for respective 62 trade days.__

In [262]:
import cvxpy as cp
import pandas as pd



def calculate_optimal_weights(test, date):
    # only keep the rows of a specific date
    # make a date be of type datetime
    date = pd.to_datetime(date)
    
    # make a new dataframe where return is the index and each column is a ticker
    return_df = test.pivot(columns='Ticker', values='Return')
    # remove stocks that have nan values in the return column
    return_df = return_df.dropna(axis=1)
    # for each ticker only keep the row of the input date
    test = test.groupby('Ticker').apply(lambda x: x.loc[x.index == date])
    test['Ticker'] = test['Ticker'].cat.remove_unused_categories()
    
    # ensure that the tickers in test are the same as the tickers in return_df
    test = test[test['Ticker'].isin(return_df.columns)]

    
    return_deviation = return_df.iloc[0:62]
    
    # Filter the dataframe for the given date
    #df_date = df_date[df_date.index == date]
    #df_date = df_date[pd.DatetimeIndex(df_date.index).date == date]
    # find all places in return_df that are nan
    

    # Calculate the covariance matrix
    cov_matrix = return_deviation.cov()
    # Force the covariance matrix to be symmetric
    cov_matrix = (cov_matrix + cov_matrix.T) / 2
    
    # Number of stocks
    #n = len(ticker_symbols)
    n = len(test['Ticker'])
    #print('test',test['Ticker'].head(),len(test))
    # Weights for each stock in the portfolio
    w = cp.Variable(n)
    #print(w.shape, cov_matrix.shape,n)
    # Objective is to minimize portfolio variance
    risk = cp.quad_form(w, cov_matrix)
    # Parameter for the cardinality constraint
    k = 50

    # Cardinality constraint with penalty in the objective
    objective = cp.Minimize(risk - k * cp.sum(w))

    # objective = cp.Minimize(risk)
    

    # Constraints:
    # - weights must sum to 1
    # - only consider stocks where the SVM prediction is 1 (price will increase)
    # - expected portfolio return must be at least a certain value (e.g., 0.01)
    constraints = [
        cp.sum(w) == 1,
        w >= 0,
        w <= 0.3,
        #w[df_date["Prediction"] == 0] == 0,
        #w[test["Prediction"] == 0 ] == 0,
        
        #w.T @ df_date["Prediction Probability (1)"] >= 0.05
        w.T @ test["Prediction Probability (1)"] >= 0.35
    ]
 
    
    
    # Solve the optimization problem
    problem = cp.Problem(objective, constraints)

    problem.solve()

    # Get the optimal weights
    optimal_weights = w.value
    if optimal_weights is None:
        optimal_weights = np.zeros(n)
    # Create a dataframe with the optimal weights
    optimal_weights_df = pd.DataFrame(optimal_weights, index=test["Ticker"])
    # add the ticker names to the dataframe
    optimal_weights_df = optimal_weights_df.join(ticker_name_df)
    # remove the index
    optimal_weights_df = optimal_weights_df.reset_index(drop=True)
    # make the ticker the index
    optimal_weights_df = optimal_weights_df.set_index('Ticker')
    # make the column name weights
    optimal_weights_df.columns = ['Weights']
   


    # Sort the dataframe by the optimal weights
    optimal_weights_df = optimal_weights_df.sort_values(by=['Weights'], ascending=False)

    # Only keep the top 10 stocks and their weights
    optimal_weights_df = optimal_weights_df.head(10)

    return round(optimal_weights_df,3)


In [179]:

import pandas as pd

def calculate_return(ticker, df, start_date):
    # Convert string dates to datetime
    start_date = pd.to_datetime(start_date)
    # Calculate the end date as start date + 62 trading days
    end_date = start_date + pd.tseries.offsets.BDay(63)
    # remove nan values from df
    df = df.dropna(axis=1)
    
    # Filter the dataframe for the given ticker
    df_ticker = df[df['Ticker'] == ticker]
    # Filter data between start and end dates
    mask = (df_ticker.index >= start_date) & (df_ticker.index <= end_date)
    df_ticker = df_ticker.loc[mask]
    
    # Get the opening price at the start and end dates
    start_price = df_ticker['Open'].iloc[0]
    end_price = df_ticker['Open'].iloc[-1]

    # Calculate the return
    return_percent = ((end_price - start_price) / start_price) * 100

    return round(return_percent,2)


In [263]:
# for each date in the test set calculate the return of the portfolio
# empty list to store the returns
returns_list = []
# for each date in the test set
for date in test.index.unique():
    # calculate the optimal weights for the portfolio
    optimal_weights_df = calculate_optimal_weights(test, date)
    print(optimal_weights_df)
    # calculate the return of the portfolio
    # for the top 10 weighted stocks in the portfolio calculate their return
    # empty list to store the returns
    return_list = []
    # for each stock in the portfolio
    for ticker in optimal_weights_df.index:
        if optimal_weights_df.loc[ticker]['Weights'] > 0:
            # calculate the return of the stock
            return_list.append(calculate_return(ticker, initial_df, date))
    # Calculate the return of the portfolio
    portfolio_return = 0
    for i in range(len(return_list)):
        portfolio_return += return_list[i] * optimal_weights_df.iloc[i]['Weights']
    # append the return to the list
    returns_list.append(round(portfolio_return,1))


        Weights
Ticker         
F         0.261
MTD       0.098
AMZN      0.085
NFLX      0.043
BBWI      0.028
CMG       0.019
MO        0.013
AZO       0.012
ULTA      0.010
KO        0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.188
AMZN      0.178
MTD       0.048
NFLX      0.028
BBWI      0.015
CMG       0.010
KO        0.007
MO        0.007
GM        0.005
CHTR      0.004


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.240
AMZN      0.109
MTD       0.087
TSLA      0.048
NFLX      0.038
CMG       0.016
MO        0.015
BBWI      0.013
ULTA      0.007
CHTR      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.202
AMZN      0.137
MTD       0.049
NFLX      0.031
BBWI      0.017
CMG       0.011
CTRA      0.010
MO        0.008
EBAY      0.005
HAL       0.004


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.206
F         0.205
MTD       0.121
NFLX      0.088
BBWI      0.040
HAL       0.017
MO        0.015
CMG       0.014
KMI       0.009
CTRA      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.236
AMZN      0.071
MTD       0.065
NFLX      0.034
BBWI      0.018
MO        0.017
KR        0.011
BKR       0.010
HAL       0.008
CMG       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.204
AMZN      0.124
KR        0.096
MTD       0.050
BKR       0.027
NFLX      0.022
BBWI      0.020
MO        0.007
CMG       0.007
KO        0.005


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.266
MTD       0.107
NFLX      0.057
KMI       0.040
BBWI      0.035
MO        0.032
AMZN      0.023
BKR       0.014
BKNG      0.011
CMG       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.166
F         0.165
HAL       0.063
MRO       0.039
MTD       0.038
NFLX      0.017
GM        0.015
KMI       0.012
MO        0.009
BBWI      0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.232
KHC       0.098
MTD       0.088
TSLA      0.086
NFLX      0.037
BBWI      0.027
HAL       0.023
MRO       0.022
AMZN      0.018
MO        0.014


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.228
AMZN      0.148
MTD       0.114
NFLX      0.065
TSLA      0.064
MO        0.023
CMG       0.018
BBWI      0.017
MRO       0.012
APA       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.261
MTD       0.095
NFLX      0.056
MO        0.032
AMZN      0.029
CMG       0.018
BBWI      0.013
BKNG      0.011
AAP       0.009
HAL       0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.214
AMZN      0.098
MTD       0.052
ETSY      0.040
GM        0.037
NFLX      0.033
MET       0.017
MO        0.017
KO        0.016
AMD       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.237
F         0.139
MTD       0.089
NFLX      0.043
TSLA      0.035
CMG       0.018
MO        0.017
BBWI      0.015
BKR       0.013
AAP       0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.193
F         0.189
MTD       0.053
NFLX      0.023
HAL       0.015
CMG       0.011
MO        0.011
AMD       0.010
MRO       0.006
BBWI      0.005


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.229
F         0.075
MTD       0.059
NKE       0.035
NFLX      0.021
MO        0.020
EBAY      0.015
AZO       0.010
CMG       0.009
BBWI      0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.214
F         0.127
MTD       0.054
NFLX      0.017
CCL       0.016
CAG       0.014
BBWI      0.013
MO        0.012
AZO       0.010
CMG       0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.194
F         0.155
MTD       0.047
BKR       0.016
NFLX      0.014
WFC       0.014
LVS       0.012
AZO       0.008
EBAY      0.008
CMG       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.231
F         0.194
MTD       0.134
NFLX      0.038
CMG       0.022
MO        0.021
AZO       0.021
BBWI      0.016
ULTA      0.012
AAP       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.200
AMZN      0.150
MTD       0.049
NFLX      0.018
CMG       0.011
AZO       0.009
KO        0.008
BBWI      0.008
ULTA      0.007
MO        0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.209
LVS       0.112
F         0.112
MTD       0.057
NFLX      0.018
GM        0.013
AZO       0.010
BBWI      0.010
KO        0.009
MO        0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.184
AMZN      0.170
MTD       0.042
NFLX      0.015
BBWI      0.012
MO        0.009
GM        0.008
AZO       0.007
KO        0.007
CMG       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.244
AMZN      0.114
MTD       0.093
NFLX      0.040
MO        0.021
AZO       0.016
BBWI      0.016
CMG       0.013
ULTA      0.011
AAP       0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.257
MTD       0.093
AMZN      0.050
NFLX      0.033
MO        0.025
AZO       0.017
CMG       0.013
BBWI      0.012
ULTA      0.011
AAP       0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.239
AMZN      0.108
MTD       0.070
NFLX      0.026
AZO       0.013
MO        0.013
BBWI      0.012
KO        0.011
GM        0.010
CMG       0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
MTD       0.266
F         0.217
AMZN      0.093
NFLX      0.072
BBWI      0.040
AZO       0.027
MO        0.009
ULTA      0.002
PSA       0.002
KO        0.002


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.226
AMZN      0.131
MTD       0.082
NFLX      0.036
BBWI      0.020
PCG       0.016
AZO       0.014
CMG       0.014
MO        0.013
TSLA      0.012


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.189
AMZN      0.185
MTD       0.052
NFLX      0.028
PCG       0.015
BBWI      0.011
CMG       0.010
AZO       0.008
GM        0.008
AMD       0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.278
MTD       0.148
NFLX      0.059
AMZN      0.037
BBWI      0.032
AZO       0.027
CMG       0.022
ULTA      0.016
APA       0.015
MO        0.015


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.205
F         0.191
MTD       0.067
NFLX      0.031
BBWI      0.014
CMG       0.012
AZO       0.009
GM        0.006
MO        0.006
ULTA      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.221
AMZN      0.185
MTD       0.088
NFLX      0.064
GM        0.021
KMI       0.013
CMG       0.013
SLB       0.011
AZO       0.011
BBWI      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.233
AMZN      0.148
MTD       0.088
NFLX      0.049
BBWI      0.016
CMG       0.016
MO        0.012
KO        0.012
AZO       0.010
ULTA      0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.179
AMZN      0.158
BBWI      0.044
MTD       0.041
HAL       0.035
NFLX      0.032
KO        0.011
CSX       0.010
CMG       0.009
MO        0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.237
MTD       0.069
HAL       0.064
NFLX      0.036
AMZN      0.032
BBWI      0.024
CMG       0.015
MO        0.012
NVR       0.010
KO        0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.238
MTD       0.065
NFLX      0.031
AMZN      0.029
BBWI      0.015
CMG       0.014
TSLA      0.012
MO        0.011
AZO       0.009
HAL       0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.231
MTD       0.055
AMZN      0.028
NFLX      0.020
AZO       0.009
MO        0.009
CMG       0.008
BBWI      0.008
AAP       0.006
CCL       0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.237
MTD       0.064
EBAY      0.055
TSLA      0.021
NFLX      0.018
BBWI      0.009
MO        0.008
AAP       0.007
CMG       0.007
KO        0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.199
MTD       0.042
TSLA      0.039
PCG       0.016
NFLX      0.015
ULTA      0.013
NEM       0.012
EBAY      0.012
ALGN      0.010
PSA       0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.162
F         0.155
MTD       0.032
TSLA      0.018
NFLX      0.013
PCG       0.011
NEM       0.011
ULTA      0.010
STZ       0.008
CHD       0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.205
MTD       0.048
AMZN      0.039
GM        0.026
TSLA      0.021
PCG       0.021
NFLX      0.018
ULTA      0.011
NEM       0.010
ALGN      0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.213
AMZN      0.089
MTD       0.073
NFLX      0.043
PCG       0.023
TSLA      0.022
MDLZ      0.013
ULTA      0.012
NEM       0.011
ENPH      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.187
KHC       0.173
AMZN      0.141
MTD       0.064
ETSY      0.028
NFLX      0.025
BBWI      0.010
MO        0.010
EQT       0.008
KO        0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.220
AMZN      0.129
MTD       0.058
NFLX      0.025
KHC       0.011
MO        0.009
BBWI      0.009
APA       0.007
AAP       0.006
EBAY      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.209
AMZN      0.106
MTD       0.043
NFLX      0.022
PCG       0.018
KO        0.014
TSLA      0.009
AMD       0.008
HAL       0.008
ULTA      0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.228
KR        0.074
MTD       0.053
NFLX      0.022
KO        0.020
PCG       0.013
BBWI      0.013
AMD       0.010
AMZN      0.010
MO        0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.234
MTD       0.058
NFLX      0.020
AMZN      0.019
MRO       0.013
BBWI      0.011
KR        0.010
KO        0.010
EBAY      0.008
MO        0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.225
AMZN      0.114
EXPE      0.070
MTD       0.062
NFLX      0.032
BBWI      0.015
EBAY      0.013
MO        0.007
KO        0.007
MRO       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.261
MTD       0.085
AMZN      0.042
NFLX      0.037
BBWI      0.020
EBAY      0.018
AAPL      0.013
AAP       0.011
MRO       0.010
CHTR      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.278
MTD       0.150
NFLX      0.058
AMZN      0.038
BBWI      0.030
AAP       0.018
ULTA      0.015
CHTR      0.013
MO        0.012
WMB       0.012


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.262
MTD       0.096
NFLX      0.048
AMZN      0.034
BBWI      0.022
GM        0.021
KO        0.014
DHI       0.011
AAP       0.010
CHTR      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.207
AMZN      0.159
MTD       0.054
NFLX      0.034
DIS       0.020
GM        0.011
BBWI      0.011
KMI       0.010
KO        0.007
DHI       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.267
MTD       0.109
AMZN      0.082
NFLX      0.052
GM        0.027
BBWI      0.024
KHC       0.018
KMI       0.014
ULTA      0.011
CHTR      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.207
F         0.116
NVDA      0.053
MTD       0.046
KMI       0.025
NFLX      0.024
BBWI      0.011
GM        0.009
CHTR      0.006
MO        0.005


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.213
AMZN      0.125
MTD       0.050
NFLX      0.036
TSLA      0.019
BBWI      0.012
MO        0.009
KO        0.009
BKR       0.007
AZO       0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.245
MTD       0.069
AMZN      0.058
NFLX      0.046
BBWI      0.029
MO        0.013
NOW       0.011
KO        0.011
AZO       0.010
GM        0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.208
AMZN      0.113
BBWI      0.075
MTD       0.046
NFLX      0.028
GM        0.015
MO        0.011
NOW       0.010
EBAY      0.007
AMD       0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.204
BBWI      0.132
AMZN      0.105
MTD       0.056
NFLX      0.042
GM        0.018
AMD       0.008
CHTR      0.007
SCHW      0.006
MO        0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.225
AMZN      0.078
BBWI      0.076
MTD       0.060
NFLX      0.039
GM        0.016
AAPL      0.011
MO        0.008
CTRA      0.008
CHTR      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.198
F         0.154
MTD       0.048
NFLX      0.039
BBWI      0.036
EBAY      0.019
MRO       0.010
GM        0.009
CHTR      0.005
AZO       0.005


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.194
F         0.173
BBWI      0.075
MTD       0.053
NFLX      0.037
KMI       0.031
DLTR      0.012
MRO       0.011
CTRA      0.009
CMG       0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.204
AMZN      0.152
MTD       0.048
NFLX      0.030
AAPL      0.010
BBWI      0.009
CMG       0.008
AZO       0.007
CTRA      0.007
CHTR      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
TSLA      0.226
MTD       0.169
F         0.087
NFLX      0.073
AMZN      0.028
PCG       0.018
BBWI      0.017
AAPL      0.015
EIX       0.015
BAX       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.181
F         0.168
MTD       0.040
NFLX      0.029
BBWI      0.026
APA       0.010
EBAY      0.007
CMG       0.006
KO        0.006
AZO       0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.185
AMZN      0.167
MTD       0.043
NFLX      0.024
BBWI      0.015
APA       0.008
KO        0.008
CTRA      0.008
CMG       0.006
MO        0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.195
AMZN      0.120
MTD       0.051
BBWI      0.032
NFLX      0.027
TSLA      0.024
PCG       0.016
NEM       0.013
ULTA      0.011
CHD       0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.207
F         0.200
TSLA      0.097
MTD       0.069
NFLX      0.042
BBWI      0.041
KMI       0.011
KO        0.010
KR        0.010
CMG       0.009


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.202
F         0.201
MTD       0.042
NFLX      0.033
BBWI      0.021
CMG       0.011
ULTA      0.009
APA       0.008
AZO       0.007
CHTR      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.207
AMZN      0.158
NFLX      0.076
MTD       0.070
BBWI      0.026
TSLA      0.018
PCG       0.016
ULTA      0.013
KO        0.012
NEM       0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.222
AMZN      0.084
NFLX      0.045
MTD       0.034
BBWI      0.029
AZO       0.024
PCG       0.019
TSLA      0.012
DHR       0.011
ULTA      0.011


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.235
DHR       0.067
MTD       0.042
NFLX      0.035
AMZN      0.031
AZO       0.028
BBWI      0.023
PCG       0.016
CMG       0.010
ULTA      0.010


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.189
AMZN      0.139
DHR       0.083
NFLX      0.020
MTD       0.020
BBWI      0.020
AZO       0.017
CMG       0.007
HAL       0.007
GM        0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.213
AMZN      0.116
DHR       0.097
NFLX      0.025
MTD       0.022
BBWI      0.021
AZO       0.020
CMG       0.008
KO        0.008
ULTA      0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.184
AMZN      0.143
DHR       0.051
NFLX      0.021
PCG       0.019
BBWI      0.016
AZO       0.015
TSLA      0.011
MTD       0.010
ULTA      0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.185
F         0.168
NFLX      0.042
HAL       0.028
AZO       0.016
EBAY      0.014
MTD       0.012
BBWI      0.011
PCG       0.009
CMG       0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.182
AMZN      0.160
NFLX      0.047
PCG       0.018
MTD       0.015
AZO       0.014
BBWI      0.010
EBAY      0.010
CMG       0.009
KO        0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.213
AMZN      0.088
CAG       0.067
NFLX      0.058
AZO       0.016
MTD       0.015
CMG       0.010
BBWI      0.009
KO        0.007
MO        0.007


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.200
AMZN      0.111
NFLX      0.060
BBWI      0.037
XOM       0.036
EBAY      0.029
KO        0.023
CCL       0.017
AZO       0.015
WFC       0.014


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.240
NFLX      0.060
AMZN      0.031
AZO       0.020
APA       0.014
BBWI      0.014
CMG       0.013
ULTA      0.009
MTD       0.008
MO        0.008


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
NFLX      0.227
F         0.156
MTD       0.078
AAPL      0.065
ULTA      0.023
AMZN      0.023
AAP       0.014
TDG       0.009
BKNG      0.004
MO        0.002


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.270
NFLX      0.073
AMZN      0.043
AZO       0.028
MTD       0.019
CMG       0.018
BBWI      0.015
ULTA      0.014
BKNG      0.013
TDG       0.012


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.274
NFLX      0.090
AZO       0.026
AMZN      0.023
CMG       0.019
BBWI      0.019
MTD       0.018
BKNG      0.017
ULTA      0.015
MO        0.013


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
AMZN      0.187
F         0.170
NFLX      0.032
AZO       0.010
CMG       0.009
BBWI      0.006
BKNG      0.006
MTD       0.006
CHTR      0.006
ULTA      0.006


  result = libops.scalar_compare(x.ravel(), y, op)


        Weights
Ticker         
F         0.243
AMZN      0.095
NFLX      0.046
BBWI      0.017
CMG       0.014
MTD       0.014
AZO       0.013
ULTA      0.010
BKNG      0.008
CHTR      0.008


  result = libops.scalar_compare(x.ravel(), y, op)


In [264]:
# Make a dataframe with the returns and dates
returns_df = pd.DataFrame(returns_list, index=test.index.unique())
# make the column name returns
returns_df.columns = ['Portfolio 63-day Returns (%)']
returns_df

Unnamed: 0,Portfolio 63-day Returns (%)
2019-09-04,1.8
2019-09-05,0.4
2019-09-06,1.4
2019-09-09,-1.1
2019-09-10,0.9
...,...
2019-12-24,-13.3
2019-12-26,-19.0
2019-12-27,-17.8
2019-12-30,-7.0


In [265]:
# find number of days with 0 return
zero_return = 0
for i in range(len(returns_df)):
    if returns_df.iloc[i]['Portfolio 63-day Returns (%)'] == 0:
        zero_return += 1
print('Number of days with 0 return:', zero_return)


Number of days with 0 return: 0


In [266]:
# average return
print('Average return:', round(returns_df.mean()[0],2))

Average return: 1.6


SP500 return in the same period

In [267]:
import yfinance as yf
from pandas_datareader import data as pdr
# This line is needed to fix a problem with pandas_datareader
yf.pdr_override()

start_date = '2019-09-04'
end_date = '2020-04-02'
# 
# Get data from Yahoo Finance
sp500 = pdr.get_data_yahoo('^GSPC', start=start_date, end=end_date)
# we are only interested in the open price
sp500 = sp500['Open']
# Transform it into a dataframe
sp500_returns = pd.DataFrame(sp500)
# Make 'S&P 500 Index Price' the column header
sp500_returns = sp500_returns.rename(columns={'Open': 'S&P 500 Index Price'})
# Calculate the 3 month return for each day
sp500_returns['S&P 500 - 3 Month Return (%)'] = sp500_returns['S&P 500 Index Price'].pct_change(periods=63).shift(-63)
# calculating all 3-month return values by 100 to obtain percent
sp500_returns['S&P 500 - 3 Month Return (%)'] = sp500_returns['S&P 500 - 3 Month Return (%)'] * 100
sp500_returns.round(2)
# remove the S&P 500 Index Price column
sp500_returns = sp500_returns.drop(columns=['S&P 500 Index Price'])
# removing all rows with NaN values (The last 62 days, since they don't have data for 3 months into the future)
sp500_returns = sp500_returns.dropna()
sp500_returns

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,S&P 500 - 3 Month Return (%)
Date,Unnamed: 1_level_1
2019-09-04,5.564388
2019-09-05,4.826721
2019-09-06,4.659883
2019-09-09,4.891873
2019-09-10,5.750573
...,...
2019-12-24,-22.451438
2019-12-26,-20.802239
2019-12-27,-21.194988
2019-12-30,-19.301937


In [268]:
# add the returns of random monkey model as a new column
random_monkey_returns = pd.read_csv('random_monkey_portfolio_returns.csv', index_col=0)

In [269]:
# make random monkey index as datetime
random_monkey_returns.index = pd.to_datetime(random_monkey_returns.index)


In [270]:
# Combine the returns of the portfolio and the S&P 500
combined_returns = pd.concat([returns_df, sp500_returns], axis='columns', join='inner')

#combined_returns['Random Monkey Portfolio 62-day Returns (%)'] = random_monkey_returns['Random Monkey - 3 Month Return (%)']
# make a column that shows the difference between the portfolio returns and the S&P 500 returns
#combined_returns['Difference (%)'] = combined_returns['Portfolio 62-day Returns (%)'] - combined_returns['S&P 500 - 3 Month Return (%)']
combined_returns.round(1)

Unnamed: 0,Portfolio 63-day Returns (%),S&P 500 - 3 Month Return (%)
2019-09-04,1.8,5.6
2019-09-05,0.4,4.8
2019-09-06,1.4,4.7
2019-09-09,-1.1,4.9
2019-09-10,0.9,5.8
...,...,...
2019-12-24,-13.3,-22.5
2019-12-26,-19.0,-20.8
2019-12-27,-17.8,-21.2
2019-12-30,-7.0,-19.3


In [271]:
# add a new column that shows the Random Monkey returns
combined_returns['Random Monkey 63-day Returns (%)'] = random_monkey_returns['Random Monkey - 3 Month Return (%)'].copy()
round(combined_returns,1)

Unnamed: 0,Portfolio 63-day Returns (%),S&P 500 - 3 Month Return (%),Random Monkey 63-day Returns (%)
2019-09-04,1.8,5.6,5.9
2019-09-05,0.4,4.8,1.5
2019-09-06,1.4,4.7,3.9
2019-09-09,-1.1,4.9,-0.8
2019-09-10,0.9,5.8,4.2
...,...,...,...
2019-12-24,-13.3,-22.5,-14.3
2019-12-26,-19.0,-20.8,-25.4
2019-12-27,-17.8,-21.2,-27.4
2019-12-30,-7.0,-19.3,-40.3


In [272]:
# average return of each column
average_return = combined_returns.mean()
average_return


Portfolio 63-day Returns (%)        1.597590
S&P 500 - 3 Month Return (%)        0.979898
Random Monkey 63-day Returns (%)    0.637349
dtype: float64

In [273]:
# standard deviation of each column
standard_deviation = combined_returns.std()
standard_deviation

Portfolio 63-day Returns (%)         7.684685
S&P 500 - 3 Month Return (%)        12.145304
Random Monkey 63-day Returns (%)    14.115600
dtype: float64

In [275]:
from scipy import stats
# calculate whether there is a significant difference between the portfolio returns and the S&P 500 returns
# calculate the t-statistic
t_statistic = (average_return['Portfolio 63-day Returns (%)'] - average_return['S&P 500 - 3 Month Return (%)']) / standard_deviation['Portfolio 63-day Returns (%)']
# calculate the p-value
p_value = stats.t.sf(np.abs(t_statistic), len(combined_returns)-1)*2
# print the results
print('t-statistic: ', round(t_statistic,2))
print('p-value for SVM Portfolio and S&P500 returns: ', round(p_value,2))


t-statistic:  0.08
p-value for SVM Portfolio and S&P500 returns:  0.94


In [276]:
# calculate whether there is a significant difference between the portfolio returns and the S&P 500 returns
# calculate the t-statistic
t_statistic = (average_return['Portfolio 63-day Returns (%)'] - average_return['Random Monkey 63-day Returns (%)']) / standard_deviation['Portfolio 63-day Returns (%)']
# calculate the p-value
p_value = stats.t.sf(np.abs(t_statistic), len(combined_returns)-1)*2
# print the results
print('t-statistic: ', round(t_statistic,2))
print('p-value for SVM Portfolio and Random Monkey returns: ', round(p_value,2))

t-statistic:  0.12
p-value for SVM Portfolio and Random Monkey returns:  0.9


__Backup-results in case it fails__:

In [None]:
# Make a dataframe with the returns and dates
returns_df = pd.DataFrame(returns_list, index=test.index.unique())
# make the column name returns
returns_df.columns = ['Portfolio 62-day Returns (%)']
returns_df

Unnamed: 0,Portfolio 62-day Returns (%)
2019-09-05,8.0
2019-09-06,6.1
2019-09-09,5.3
2019-09-10,5.4
2019-09-11,4.2
...,...
2019-12-26,-54.0
2019-12-27,-50.8
2019-12-30,-21.6
2019-12-31,-21.1
