In [None]:
# read IEX_TOKEN and IEX_API_VERSION from .env file

import os
import iexfinance
import pandas as pd
import yfinance as yf

# Setup cache for requests
from datetime import datetime
from iexfinance.stocks import Stock
import requests_cache
import datetime


with open('.env', 'r') as env:
    os.environ.update({line.split('=')[0].strip(): line.split('=')[1].strip() for line in  filter(lambda li: '=' in li, env.read().split('\n'))})    

In [None]:
from lib.utils import *

In [None]:
trade1 = {
    'no_shares': 168.0000,
    'buy_price': 29.63,
    'ticker': 'VUKE.L',
    'date': datetime(month=3, day=2, year=2021)
    
}

trade2 = {
    'no_shares': 58,
    'buy_price': 34.01,
    'ticker': 'VMID.L',
    'date': datetime(month=3, day=2, year=2021)
}


def print_trade_returns(trade):
    stock = Stock(trade.get('ticker'))
    price = stock.get_price()
    price = price[trade.get('ticker')].values[0]
    value = price * trade.get('no_shares')
    returns = (price - trade.get('buy_price'))/price
    print(f'value: {value}, return: {returns*100}%')
    return value


print_trade_returns(trade1) + print_trade_returns(trade2) + 112.27



In [None]:
expiry = datetime.timedelta(days=100)
session = requests_cache.CachedSession(cache_name='cache',
                                       backend='sqlite',
                                       expire_after=expiry)

vuke = Stock('VUKE-LN')

"""
purchased on 
Order summary
Estimated value more information £5,000.00 are
Order number 5674775
Status updated
Date 03 March 2021
Time 10:16
Actual contract cost
Value £4,977.62
Settlement price £29.63
Contract total £4,977.62
"""
import json
import pandas as pd

# Static data
emea_cty_codes = ['AT','BE','BG','HR','CY','CZ','DK','EE','FI','FR','DE','GR','HU','IE','IT','LV','LT','LU','MT','NL','PL','PT','RO','SK','SI','ES','SE','GB']

with open('./data/iex_exchanges.json', 'r') as fil:
    df = pd.read_json(fil)
iex_exchanges = df

from iexfinance.refdata import get_region_symbols

def download_symbol_static():
    df = pd.DataFrame()
    for cty_code in emea_cty_codes:
        if cty_code in iex_exchanges.region.values:
            symbols = get_region_symbols(cty_code)
            if len(symbols) == 0:
                print(f'cty_code={cty_code} not found')
                continue
            df = df.append(symbols)
        else:
            print(f'cty_code={cty_code} is not supported by iex')
    all_emea_symbols = df
    all_emea_symbols.to_csv('data/all_emea_symbols.csv')
    return all_emea_symbols

# EMEA Universe
all_emea_symbols = pd.read_csv('./data/all_emea_symbols.csv')

# FTSE 100 data
ftse_companies = pd.read_csv('./data/ftse100.csv')
ftse_tickers = list(map(lambda tick: f'{tick}-LN', ftse_companies.Code))

# data on vanguard etfs
vg_etfs = pd.read_csv('/home/rory/dev/investment-analysis/data/vanguard_fund_summaries.csv')
vanguard_funds = all_emea_symbols[all_emea_symbols.name.str.contains('Vanguard')]
vanguard_funds.name

# Vanguard etf tickers
tickers=list(map(lambda it: it.replace(' ', '-'), list(vg_etfs.Bloomberg)))


In [None]:
################
# Model Features
################
# VG ETF prices
vg_etf_basket = Stock(tickers, session=session)
vg_etf_prices = vg_etf_basket.get_historical_prices(range='5y')
vg_etf_prices = process_basket_data(vg_etf_prices)

# VG Momentum funds
momentum_etfs = Stock(list(vanguard_funds[vanguard_funds.symbol.str.contains('VMOM')].symbol.values), session=session)
momentum_prices = momentum_etfs.get_historical_prices(range='5y')
momentum_prices = process_basket_data(momentum_prices)

# FTSE 100 names
ftse_basket = Stock(list(ftse_tickers[0:100]), session=session)
ftse_basket_prices = ftse_basket.get_historical_prices(range='5y')
ftse_basket_prices = process_basket_data(ftse_basket_prices)

# Filter out columns with too few observations
mask = [tick for tick in filter(lambda ticker: ftse_basket_prices.loc[ticker].close.size >=1200, ftse_basket_prices.index.levels[0])]
ftse_basket_prices = ftse_basket_prices.loc[mask]


####################
# Prediction Targets
####################
# FTSE100 INDEX
ftse100 = Stock('VUKE-LN', session=session)
ftse100_prices = ftse100.get_historical_prices(range='5y')
ftse100_prices.index = pd.to_datetime(ftse100_prices.index)

ftse250 = Stock('VMID-LN', session=session)
ftse250_prices = ftse250.get_historical_prices(range='5y')
ftse250_prices.index = pd.to_datetime(ftse250_prices.index)
ftse250_prices


ftse_basket_prices.index.levels[0]

ftse_prediction = normalise_basket(ftse_basket_prices, 'returns').dropna()
ftse_prediction['index'] = daily_returns(ftse100_prices.close.astype('float'))
ftse_prediction['target'] = ftse_prediction['index'].astype('float').apply(lambda ret: 1 if ret > 0 else 0)

ftse_prediction = ftse_prediction.dropna()[1:]


In [None]:
ftse_prediction['index']

In [None]:
import sqlite3
con=sqlite3.connect('./example.db')
ftse_prediction.to_sql(name='ftse_prediction', if_exists='replace', con=con)

In [None]:
## apply lag to x of 1, so we only ever use yesterday's close price of components
period_lag = -1
ftse_prediction['target'] = ftse_prediction.target.shift(period_lag)
ftse_prediction['index'] = ftse_prediction['index'].shift(period_lag)
#ftse_prediction = ftse_prediction[period_lag:]


#################
# Features Subset
#################
test_cols = list(filter(lambda it: it not in [ 'index', 'target'] and 'minus' not in it, ftse_prediction.columns))
for col in test_cols:
    for i in range(1,6):
        ftse_prediction[f'{col}_tminus{i}'] = ftse_prediction[col].shift(i)
test_cols = list(filter(lambda it: it not in [ 'index', 'target'], ftse_prediction.columns))

ftse_prediction.dropna(inplace=True)



In [None]:
import pandas as pd
import sqlite3
con = sqlite3.connect('example.db')



ftse_prediction = pd.read_sql('select * from ftse_prediction', con)
test_cols = list(filter(lambda it: it not in [ 'index', 'target'], ftse_prediction.columns))
ftse_prediction.index = pd.to_datetime(ftse_prediction['level_0'])
ftse_prediction = ftse_prediction[['target', 'index'] + test_cols]


In [None]:
ftse_prediction

In [None]:
# Principal Component Analysis

from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt

df = ftse_prediction[list(filter(lambda ticker: ticker not in  ['index', 'target'], ftse_prediction.columns))]

#basket_returns = ftse_prediction#normalise_basket(df, 'returns')
pca = PCA()
pca.fit(df)

plt.figure(figsize=(20,10))
y = np.round(pca.explained_variance_ratio_* 100, decimals =2)
plt.bar(x=range(0, pca.components_[0].size), height=y)
plt.xticks(range(0, pca.components_[0].size), rotation=90)
plt.show()

# Visualise first component
plt.figure(figsize=(20,10))
plt.bar(x=range(0, pca.components_[0].size), height=pca.components_[0])
plt.xticks(range(0, pca.components_[0].size), df.columns, rotation=90)
plt.show()

ftse_pca_score = pca.transform(df)

In [None]:
#######################
# Train/Test subsetting
#######################
from datetime import datetime

train_start = pd.to_datetime('16/05/2019')
train_end = pd.to_datetime('31/01/2021')

train_mask = lambda s: (s.index > train_start) & (s.index <= train_end)
test_mask = lambda s: (s.index > train_end) & (s.index <= datetime.now())


In [None]:
X_train['target']

In [None]:
######################
# Lasso regression
#  y: ftse price 
#  X: basket of stocks
######################
from sklearn.linear_model import Lasso

X_train=ftse_prediction[train_mask(ftse_prediction)][test_cols]
y_train=ftse_prediction[train_mask(ftse_prediction)]['index']

X_test = ftse_prediction[test_mask(ftse_prediction)][test_cols]
y_test = ftse_prediction[test_mask(ftse_prediction)]['index']
clf = Lasso(alpha=0.1)
clf.fit(X=X_train, y=y_train)
print(f'Train Score: {clf.score(X=X_train, y=y_train)}')
print(f'Model score: {clf.score(X=X_test, y=y_test)}')
clf.get_params()


In [None]:
# fit an ARIMA model and plot residual errors
from pandas import datetime
from pandas import read_csv
from pandas import DataFrame
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
# load dataset

series = ftse_prediction['index'].astype('float')

# fit model
model = ARIMA(series, order=(5,1,0))
model_fit = model.fit()
# summary of fit model
print(model_fit.summary())
# line plot of residuals
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
# density plot of residuals
residuals.plot(kind='kde')
pyplot.show()
# summary stats of residuals
print(residuals.describe())

In [None]:
###########################################
# Lasso regression on classification target
# of increase or decrease daily returns
###########################################

X_train=ftse_prediction[train_mask(ftse_prediction)][list(filter(lambda it: it not in ['index','target'], ftse_prediction.columns))]
y_train=ftse_prediction[train_mask(ftse_prediction)]['target'].astype('int')

X_test = ftse_prediction[test_mask(ftse_prediction)][list(filter(lambda it: it not in ['index','target'], ftse_prediction.columns))]
y_test = ftse_prediction[test_mask(ftse_prediction)]['target'].astype('int')
clf = Lasso(alpha=0.001)
clf.fit(X=X_train, y=y_train)
clf.score(X=X_test, y=y_test)


In [None]:
clf.predict(X=X_test)

In [None]:
from sklearn.neighbors import  NearestCentroid
train_mask(ftse_prediction)

features_train = ftse_prediction[train_mask(ftse_prediction)][test_cols]
features_test = ftse_prediction[test_mask(ftse_prediction)][test_cols]
target_train = ftse_prediction[train_mask(ftse_prediction)][['target']].astype('int')
target_test = ftse_prediction[test_mask(ftse_prediction)]['target'].astype('int')

pca = PCA()
target = ftse_prediction['target']

clf = NearestCentroid()

#pca.fit(features_train)
#features_train = pca.transform(features_train)
#features_test = pca.transform(features_test)
clf.fit(X=features_train, y=target_train)

clf.score(y=target_test, X=features_test)

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor

lr_list = [0.75, 1, 1.1, 1.2, 1.3, 1.4]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=learning_rate, max_features=30, max_depth=10, random_state=0)
    gb_clf.fit(X_train, y_train)
    print("Learning rate: ", learning_rate)
    print("Accuracy score (training): {0:.3f}".format(gb_clf.score(X_train, y_train)))
    print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(X_test, y_test)))
    
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.75, max_features=30, max_depth=10, random_state=0)
gb_clf.fit(X_train, y_train)
print("Accuracy score (training): {0:.3f}".format(gb_clf.score(X_train, y_train)))
print("Accuracy score (validation): {0:.3f}".format(gb_clf.score(X_test, y_test)))


In [None]:
clf = gb_clf

clf.predict(features_test)
pca.fit(features_train)
feat_test_df = pd.DataFrame(pca.transform(features_test))
prediction = clf.predict(features_test)


def printtable():
    print(f'|Actual| Pred| PC1| PC2')
    print(f'--------------')

    for pred, act, pc1, pc2, targ in zip(clf.predict(features_test), 
                                         target_test, 
                                         feat_test_df[0], 
                                         feat_test_df[1], 
                                         ftse_prediction['index']):
        print('     {}|    {}|   {:0.2f}|   {:0.2f}|  {:0.2f}'.format(act, pred, pc1, pc2, targ))
        print('--------------')


cols = ['Actual', 'Predicted', 'PC1', 'PC2']
vals = [target_test, clf.predict(features_test)]

to_plot = pd.DataFrame()

to_plot['PC1'] = feat_test_df[0]
to_plot['PC2'] = feat_test_df[1]
to_plot['PC3'] = feat_test_df[2]
to_plot['PC4'] = feat_test_df[3]
to_plot['Actual'] = target_test.values
to_plot['Predicted'] = clf.predict(features_test)
fig, ax = plt.subplots()
colors = {0: 'red', 1: 'green'}
ax.scatter(to_plot['PC2'], to_plot['PC3'], c=to_plot['Actual'].map(colors))
plt.show()


In [None]:
ax1 = to_plot.plot.scatter(x='PC1', y='PC2', color=to_plot['Predicted'])



In [None]:
#list(filter( lambda model: model.ready(), models))
y_train

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

params={'n_estimators': 10000, 'learning_rate': 0.01, 'max_features': 30, 'max_depth': 4, 'random_state': 0}

parameter_space = {
    "alpha": [0.1, 0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'learning_rate': [0.1,0.2,0.3,0.4,0.5],
    'max_depth': [2,4,6,8,10],
    'max_features': [10,20,30,40,50,60],
    'min_samples_leaf': [1,2,3],
    'min_samples_split': [2,5,10],
    'n_estimators': [500,1000,2000,3000,4000,5000],
    'random_state': [0,1,2,3,4,5,6],
    'warm_start': [False,True]
}

#base_estimator = GradientBoostingRegressor()
#gb_reg.fit(X_train, y_train)

sh = GridSearchCV(GradientBoostingRegressor(), parameter_space, n_jobs=8, pre_dispatch=16)
sh.fit(X_train, y_train)

In [None]:
gb_reg.get_params()

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X_train=ftse_prediction[train_mask(ftse_prediction)][test_cols]
y_train=ftse_prediction[train_mask(ftse_prediction)]['target'].astype('int')
X_test = ftse_prediction[test_mask(ftse_prediction)][test_cols]
y_test = ftse_prediction[test_mask(ftse_prediction)]['target'].astype('int')

clf = SVC(kernel='sigmoid')

clf.fit(X=features_train,y=target_train)
y_pred = clf.predict(features_test)
print(accuracy_score(y_test,y_pred))

In [None]:
print(SVC.__doc__)

In [None]:
last_week = ftse_basket.get_historical_prices(range='1w')

last_week = process_basket_data(last_week)
last_week_data = normalise_basket(last_week, 'returns')
last_week_data

features = last_week_data[list(filter(lambda col: col not in ['filter', 'target'], last_week_data.columns))]

features = features[features.replace([np.inf, -np.inf], np.nan).notnull()]
features = features[filter(lambda item: item not in ['target', 'index'] ,ftse_prediction.columns)]

last_week_target = ftse100.get_historical_prices(range='1w').close.astype('float').pct_change()
features['target'] = last_week_target

features

In [None]:
print(gb_clf.predict(features[test_cols].dropna()))

In [None]:
last_week_target