# Keystone Project - Creating and Implementing a customized Investment Strategy (semi-active)

__Assignment / Goal:__

You want to invest in US Technology Stocks that pay Dividends with a price-weighted approach to avoid highly concentrated positions in AAPL and MSFT. The steps are:

1. Get all nasdaq listings from the csv-file nasdaq_listings.csv (alternatively, you can get the latest version from the nasdaq stock screener)

2. Filter by
- US Stocks
- Technology Stocks (Sector)

3. Load Prices and Dividends for remaining Ticker Symbols for the last three years (from "2019-12-01" to "2022-11-30") 

4. Identify Dividend paying Stocks (any Dividends in the most recent year -> last 252 trading days)

5. Create a price-weighted Total Return Index for the Dividend-paying US Tech Stocks covering the last three years.

6. Track the Index with only 40 stocks. Try to minimize the Tracking Error. 

7. Buy one share each with your IBKR Paper Trading Account. 

8. Compare your semi-active Strategy with an appropriate Benchmark (broad market index)

# --------------------SOLUTION------------------------------

## Get all nasdaq listings

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
listings = pd.read_csv("nasdaq_listings.csv", index_col = "Symbol")
listings

In [None]:
listings.info()

In [None]:
listings.Sector.value_counts()

In [None]:
listings[listings.Sector == "Technology"].Industry.value_counts()

## Filter Listings

In [None]:
mask1 = listings.Sector == "Technology"
mask1

In [None]:
mask2 = (listings.Country == "United States") 
mask2

In [None]:
tech = listings.loc[mask1 & mask2]
tech

In [None]:
tech.info()

In [None]:
symbols = list(tech.index)
symbols

In [None]:
tech[tech.index.str.contains("ACEV")]

## Loading Prices and Dividends

In [None]:
start = "2019-12-01"
end = "2022-11-30"

In [None]:
symbols

In [None]:
data = yf.download(symbols, start, end, actions = True)
data

In [None]:
data

In [None]:
data.index = pd.to_datetime(data.index)

In [None]:
data.info()

## Dividend paying Stocks

In [None]:
data

Dividend paying stocks: paying __any__ Dividends in the most recent year (since "2021-12-01") 

In [None]:
dividends = data.Dividends.loc["2021-12-01":].sum()
dividends

In [None]:
dividends[dividends > 0]

In [None]:
symbols = dividends[dividends > 0].index
symbols

In [None]:
close = data.Close[symbols].copy()
close

## Creating a price-weighted Total Return Index

In [None]:
close

In [None]:
weights_PWI = close.div(close.sum(axis = 1), axis = "rows")
weights_PWI

In [None]:
weights_PWI.sum(axis = 1)

In [None]:
total_returns = data["Adj Close"][symbols].pct_change() # Adj Close Prices!
total_returns

In [None]:
returns_index = total_returns.mul(weights_PWI.shift()).sum(axis = "columns")
returns_index

In [None]:
index = returns_index.add(1).cumprod().mul(100)
index

In [None]:
index.name = "Index"

In [None]:
index.plot()
plt.show()

## Index Tracking

In [None]:
const = symbols.to_list()
const

In [None]:
n = len(const)
n

In [None]:
i = 40
i

In [None]:
sims = 10000 

In [None]:
np.random.seed(123)
min_te = 1
tstocks = None
tportfolio = None
for sim in range(sims):
    tracking_stocks = np.random.choice(a = const, size = i, replace = False)
    weights_pwi = close[tracking_stocks].div(close[tracking_stocks].sum(axis = 1), axis = "rows")
    tracking_returns = total_returns[tracking_stocks].mul(weights_pwi.shift()).sum(axis = "columns")
    active_returns = tracking_returns - returns_index
    tracking_error = active_returns.std() * np.sqrt(252)
    tracking_portfolio = tracking_returns.add(1).cumprod()
    if tracking_error < min_te:
        min_te = tracking_error
        tstocks = tracking_stocks
        tportfolio = tracking_portfolio

In [None]:
min_te

In [None]:
tstocks

In [None]:
tportfolio

In [None]:
tportfolio.name = "Tracking_Portfolio"

In [None]:
index / index.iloc[0]

In [None]:
tportfolio.plot(figsize = (12, 8))
(index/index.iloc[0]).plot()
plt.legend()
plt.show()

## Trading with Interactive Brokers

In [None]:
tstocks

In [None]:
shares = 1

In [None]:
close[tstocks].iloc[-1].sum() * shares

In [None]:
target = pd.DataFrame(data = {"symbol": tstocks})
target

In [None]:
target["position"] = shares
target

In [None]:
from ib_insync import *
util.startLoop()

In [None]:
ib = IB()

In [None]:
ib.connect()

In [None]:
pos = ib.positions()
pos

In [None]:
df = util.df(pos)
df

In [None]:
if df is not None:
    df["symbol"] = df.contract.apply(lambda x: x.symbol)
    df["conID"] = df.contract.apply(lambda x: x.conId)
else: 
    df = pd.DataFrame(columns = ["symbol", "position"])

In [None]:
df

In [None]:
trades = pd.merge(target, df[["symbol", "position"]], "outer", on = "symbol", suffixes = ["_t", "_a"])
trades

In [None]:
trades.fillna(0, inplace = True)
trades

In [None]:
trades["trades"] = trades.position_t - trades.position_a 

In [None]:
trades

In [None]:
trades = trades[trades.trades !=0].set_index("symbol").copy()
trades

In [None]:
for symbol in trades.index:
    to_trade = trades.loc[symbol, "trades"]
    if to_trade > 0: 
        side = "BUY"
    elif to_trade < 0:
        side = "SELL"
    contract = Stock(symbol, "SMART", "USD")
    cds = ib.reqContractDetails(contract)
    if len(cds) == 0:
        print("No Contract for {} found.".format(symbol))
    elif len(cds) == 1:
        contract = cds[0].contract
        order = MarketOrder(side, abs(to_trade))
        trade = ib.placeOrder(contract, order)
        ib.sleep(2) # new
        #while not trade.isDone():
            #ib.waitOnUpdate()
        if trade.orderStatus.status == "Filled":
            print("{} {} @ {}".format(side, symbol, trade.orderStatus.avgFillPrice))
        else:
            print("{} {} {}".format(side, symbol, trade.orderStatus.status)) # new
    else:
        contract = cds[0].contract
        print("Multiple Contracts for {} found.".format(symbol))
        order = MarketOrder(side, abs(to_trade))
        trade = ib.placeOrder(contract, order)
        ib.sleep(2) # new
        #while not trade.isDone():
            #ib.waitOnUpdate()
        if trade.orderStatus.status == "Filled":
            print("{} {} @ {}".format(side, symbol, trade.orderStatus.avgFillPrice))
        else:
            print("{} {} {}".format(side, symbol, trade.orderStatus.status))# new
ib.sleep(30)
pos = ib.positions()
df = util.df(pos)
if df is not None:
    df["symbol"] = df.contract.apply(lambda x: x.symbol)
    df["conID"] = df.contract.apply(lambda x: x.conId)
else: 
    df = pd.DataFrame(columns = ["symbol", "position"])
df

In [None]:
ib.openOrders()

In [None]:
ib.disconnect()

### What´s next?

You should __monitor and measure the performance__ of your Portfolio and
- check the __tracking quality__
- __compare__ the performance of your strategy with an __appropriate Benchmark__ (e.g. S&P 500)

On a regular basis (e.g. annually) you should __reconstitute your portfolio__ and 
- remove stocks that __don´t fit the strategy__ any more (e.g. stop paying dividends)
- add new stocks that __fit the strategy__ then
- add/remove stocks to __improve tracking quality__
- to manage/minimize trading costs, you can __limit the number of stocks__ to be added/removed

## Backtesting and the Look-Ahead Bias

In [None]:
index

__Is this the historical performance of our strategy (Backtest)? No__

- Stocks were selected today based on whether they __fit the strategy today__
- To backtest the strategy (e.g. for the last year) we have to make the selection __before the backtesting period__ (one year ago) 
- otherwise: __Look-Ahead Bias__ (assuming we can make decisions based on future data that is not available yet).

__Realistic Assumption__: Country and Sector hasn´t changed in the last year

In [None]:
data

- Plan: Backtest from "2021-12-01" to "2022-11-30" 
- Dividend-paying Stocks: Paying any Dividends in the year before (from "2020-12-01" to "2021-11-30")

In [None]:
dividends = data.Dividends.loc["2020-12-01":"2021-11-30"].sum()
dividends

In [None]:
symbols = dividends[dividends > 0].index
symbols

In [None]:
len(symbols)

In [None]:
close = data.Close.loc["2021-12-01":"2022-11-30", symbols].copy()
close

In [None]:
weights_PWI = close.div(close.sum(axis = 1), axis = "rows")
weights_PWI

In [None]:
total_returns = data["Adj Close"].loc["2021-12-01":"2022-11-30", symbols].pct_change() # Adj Close Prices!
total_returns

In [None]:
returns_index = total_returns.mul(weights_PWI.shift()).sum(axis = "columns")
returns_index

In [None]:
index = returns_index.add(1).cumprod().mul(100)
index

## Backtesting and the Survivorship Bias

Still something missing? Yes.
- The backtest does __not include failed/delisted Stocks__
- It only includes Stocks that survived until today (__Survivorship Bias__). 
- Backtesting with Survivorship Bias typically __overstates__ true perfromance.
- Solution: include failed/delisted stocks (more advanced data source needed!)

## Benchmarking and the Information Ratio

In [None]:
bench = index.to_frame()
bench

In [None]:
bench.columns = ["strategy"]
bench

In [None]:
SP500 = yf.download("^SP500TR", "2021-12-01", "2022-11-30") 
SP500

In [None]:
bench["benchmark"] = SP500["Adj Close"].div(SP500["Adj Close"][0]).mul(100)
bench

In [None]:
bench.plot(figsize = (12, 8))
plt.show()

In [None]:
returns = bench.pct_change()
returns

In [None]:
def ann_risk_return(returns_df): # assumes simple returns as input
    summary = pd.DataFrame(index = returns_df.columns)
    summary["ann. Risk"] = returns_df.std() * np.sqrt(252)
    log_returns = np.log(returns_df + 1)
    summary["CAGR"] = np.exp(log_returns.mean() * 252) - 1
    return summary

In [None]:
ann_risk_return(returns)

__-> Strategy underperformed in market downturn (no surprise for tech stocks)__

In [None]:
def tracking(returns_df, index):
    active_returns = returns_df.sub(returns_df[index], axis = "rows")
    summary = pd.DataFrame(index = returns_df.columns)
    summary["TrackingError"] = active_returns.std() * np.sqrt(252)
    log_returns = np.log(active_returns + 1)
    summary["ActiveReturn"] = np.exp(log_returns.mean() * 252) - 1
    return summary

In [None]:
summary = tracking(returns, "benchmark")
summary

In [None]:
summary["InformationRatio"] = summary.ActiveReturn / summary.TrackingError
summary

The __Information Ratio (IR)__ measures and compares the active return of an investment (e.g., a security or portfolio) compared to a benchmark index relative to the volatility of the active return (also known as active risk or benchmark tracking risk). It is defined as the active return (the difference between the returns of the investment and the returns of the benchmark) divided by the tracking error (the standard deviation of the active return, i.e., the additional risk). It represents the __additional amount of return that an investor receives per unit of increase in risk__. (source: Wikipedia)

- The IR __measures the success__ of active/semi-active strategies relative to the benchmark
- postive IR: __"beating the benchmark"__
- __The higher the better__ (semi-active investing)