In [108]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 10
### Team Member Names: David, Tanvi, Johan
### Team Strategy Chosen: Market Meet

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.

-------------------------------------------------------------------------------------------------------------------------------------------
## Competition Goal: Market Meet

Goal: Try to have the 25-stock portfolio’s returns match the average of the TSX composite + S&P 500’s return (~0.370%) as accurately as possible, over the 5-day period (Nov. 24 to Nov. 28).

- **S&P/TSX Composite Index** (`^GSPTSE`)
- **S&P 500 Index** (`^GSPC`)

| Index | YTD Return | Daily Return (÷ 252)| 5-Day Estimated Return |
|:------|:-----------:|:--------------------:|:----------------------:|
| TSX Composite | 21.74% | 0.0863% | 0.43% |
| S&P 500 | 15.55% | 0.0617% | 0.31% |
| **Average (50/50)** | — | — | **≈ 0.370%** |

Use historical data (daily returns) from `yfinance` to estimate:
  - Mean returns
  - Volatility (standard deviation)
  - Sharpe ratio (risk-adjusted return)
  - Beta and alpha risk
  - Correlation with the benchmark
  - Idiosyncratic (residual) risk

Select 10–25 stocks (we aim for 25 if possible) that:
- Have **beta ≈ 1** and high correlation with the benchmark
- Are liquid (average daily volume ≥ 5,000 shares)
- Have sector diversification (no sector >40% of total value)
- Include at least one large-cap (> 10B CAD) and one small-cap (< $2B CAD)

Weights are between (100 / (2n))% and 15% (for 25 stocks, between 2% and 15%)
- Spend approximately **$1,000,000 CAD**, net of trading fees:
- Fees = min(2.15 USD, 0.001 USD * shares) per trade, applied to all purchases
-------------------------------------------------------------------------------------------------------------------------------------------

In [109]:
## === Temporary Test Ticker List (delete this when TA gives real Tickers.csv) ===

tickers_list = [
    "AAPL",      # US, tech
    "MSFT",      # US, tech
    "GOOGL",     # US, communications
    "AMZN",      # US, consumer cyclical
    "TSLA",      # Large-cap
    "NVDA",      # Large-cap, tech
    "JNJ",       # Large-cap, healthcare
    "WMT",       # Large-cap, consumer defensive
    "TD.TO",     # Large-cap, Canadian
    "SHOP.TO",   # Canadian tech
    "ENB.TO",    # Canadian energy
    "SU.TO",     # Canadian oil sands
    "L.TO",      # Consumer defensive Canada
    "TLRY",      # **Small-cap** Canadian (Tilray Brands)
    "AC.TO",     # Air Canada
    "BB.TO",     # BlackBerry
    "F",         # Ford
    "GM",        # General Motors
    "NKE",       # Nike
    "KO",        # Coca-Cola
]

In [117]:
## === 3. Metrics for S&P 500/TSX + Stocks, and Assignment Filters ===

# Requires: list from Tickers.csv already loaded earlier

sp500 = "^GSPC"
tsx   = "^GSPTSE"
start = "2024-10-01"
end   = "2025-09-30"

min_vol  = 5000
corr_min = 0.20
small_cap  = 2_000_000_000
large_cap  = 10_000_000_000
earn_low = datetime(2025,11,24).date()
earn_high = datetime(2025,11,28).date()

# Download price + volume data
tickers_all = tickers_list + [sp500, tsx]
data = yf.download(tickers_all, start=start, end=end, auto_adjust=False, progress=False)
close = data["Close"]
vol   = data["Volume"]

# Benchmark = average of S&P 500 + TSX Composite
bench = ((close[sp500] + close[tsx]) / 2).dropna()
bench_ret = bench.pct_change().dropna()

# Align stock prices to benchmark dates
stocks = close[tickers_list].loc[bench_ret.index].dropna(how="all", axis=1)
rets = stocks.pct_change().dropna()

# --- Helper functions ---

def avg_volume(series):
    """Average daily volume after removing months with <18 trading days."""
    s = series.dropna()
    if s.empty: return np.nan
    month = s.index.to_period("M")
    valid = month.value_counts()[lambda x: x >= 18].index
    return s[month.isin(valid)].mean()

def weekly_vol(r):
    """Weekly volatility computed from daily returns."""
    w = (1+r).resample("W-FRI").prod() - 1
    w = w.dropna()
    return w.std() if not w.empty else np.nan

# --- Pull metadata (sector, industry, market cap, earnings) ---

# FX rate for USD→CAD conversion
fx_raw = yf.Ticker("CADUSD=X").history(period="1d")["Close"]
usd_to_cad = fx_raw.iloc[-1] if len(fx_raw) > 0 else 0.73  # fallback rate

meta = {}
for t in stocks.columns:
    info = yf.Ticker(t).info
    sector   = info.get("sector")
    industry = info.get("industry")
    mc_raw   = info.get("marketCap", np.nan)

    if isinstance(mc_raw, (int, float)) and not pd.isna(mc_raw):
        if t.endswith(".TO"):       # Canadian stock → already CAD
            mc = mc_raw
        else:                      # US stock → convert to CAD
            mc = mc_raw / usd_to_cad
    else:
        mc = np.nan

    small = mc < small_cap if not pd.isna(mc) else False
    large = mc > large_cap if not pd.isna(mc) else False

    try:
        e = yf.Ticker(t).get_earnings_dates(limit=1)
        earn = e.index[0].datetime() if len(e) > 0 else None
    except:
        earn = None

    meta[t] = [sector, industry, mc, small, large, earn]

meta = pd.DataFrame.from_dict(
    meta, orient="index",
    columns=["Sector","Industry","MarketCap","Small-cap","Large-cap","Earnings Date"]
)

# --- Compute stock metrics ---

metrics = pd.DataFrame(index=stocks.columns,
                       columns=["AvgVol","StdDev (%)","Covariance","Beta","Correlation","WeeklyVol"],
                       dtype=float)

for t in stocks.columns:
    r = rets[t].dropna()
    b = bench_ret.reindex(r.index).dropna()

    # Align
    idx = r.index.intersection(b.index)
    r = r.loc[idx]
    b = b.loc[idx]

    # Calculations
    av = avg_volume(vol[t].loc[start:end])
    sd = r.std() * 100
    cv = r.cov(b) * (100**2)
    beta = cv / (b.var() * (100**2)) if b.var() > 0 else np.nan
    corr = r.corr(b)
    wv = weekly_vol(r) * 100

    metrics.loc[t] = [av, sd, cv, beta, corr, wv]

# --- Apply assignment filters & requirements ---

keep_vol   = metrics["AvgVol"] >= min_vol
keep_corr  = metrics["Correlation"]   >= corr_min

keep_earn  = pd.Series(True, index=metrics.index)
for t in metrics.index:
    e = meta.loc[t,"Earnings Date"]
    if meta.loc[t,"Small-cap"] and isinstance(e, datetime):
        if earn_low <= e <= earn_high:
            keep_earn[t] = False

mask = keep_vol & keep_corr & keep_earn

filtered = metrics[mask].join(meta, how="left")
filtered_tickers = list(filtered.index)

print("Original:", len(tickers_list))
print("After Filters:", len(filtered_tickers))
display(filtered.head(25))

# --- Benchmark reference data ---

bench_std   = bench_ret.std() * 100
bench_week  = weekly_vol(bench_ret) * 100

print("Benchmark Std Dev:", bench_std)
print("Benchmark Weekly Volatility:", bench_week)

Original: 20
After Filters: 17


Unnamed: 0_level_0,AvgVol,StdDev (%),Covariance,Beta,Correlation,WeeklyVol,Sector,Industry,MarketCap,Small-cap,Large-cap,Earnings Date
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AAPL,53844300.0,2.085207,1.260429,1.542102,0.6686,4.464012,Technology,Consumer Electronics,5654151000000.0,False,True,
MSFT,21715260.0,1.569207,0.849338,1.039143,0.598684,3.101843,Technology,Software - Infrastructure,5315795000000.0,False,True,
GOOGL,34183350.0,2.058194,0.922382,1.128511,0.495703,4.239133,Communication Services,Internet Content & Information,4716662000000.0,False,True,
AMZN,42469640.0,2.154998,1.262883,1.545104,0.648207,4.072332,Consumer Cyclical,Internet Retail,3560520000000.0,False,True,
TSLA,100236700.0,4.517973,2.149786,2.630207,0.526319,8.780767,Consumer Cyclical,Auto Manufacturers,1885268000000.0,False,True,
NVDA,230285500.0,3.139629,1.797425,2.199103,0.633242,6.419857,Technology,Semiconductors,6490894000000.0,False,True,
WMT,17950630.0,1.527472,0.696616,0.852292,0.504449,3.43486,Consumer Defensive,Discount Stores,1146104000000.0,False,True,
TD.TO,7545756.0,1.173627,0.438209,0.536137,0.412999,2.549827,Financial Services,Banks - Diversified,197847900000.0,False,True,
SHOP.TO,2102246.0,3.935635,2.372328,2.902481,0.666741,8.699801,Technology,Software - Application,285741500000.0,False,True,
ENB.TO,8470133.0,1.044762,0.398558,0.487625,0.42196,1.952059,Energy,Oil & Gas Midstream,148872100000.0,False,True,


Benchmark Std Dev: 0.902289793686018
Benchmark Weekly Volatility: 1.650726036520851


In [None]:
## === 4. Variable Stock Returns vs TSX/S&P 500 Benchmark ===

horizons = [5, 21, 63, 252]

# Prices aligned with benchmark dates
px = stocks[filtered_tickers].loc[stocks.index.intersection(bench.index)].dropna(how="all", axis=1)

def trailing_ret(series, d):
    s = series.dropna()
    if len(s) <= 1: return np.nan
    if len(s) <= d: return (s.iloc[-1] / s.iloc[0] - 1) * 100
    return (s.iloc[-1] / s.iloc[-(d+1)] - 1) * 100

# Stock returns table
stock_returns = pd.DataFrame(index=px.columns,
                             columns=[f"{d}d" for d in horizons])

for t in px.columns:
    for d in horizons:
        stock_returns.loc[t, f"{d}d"] = trailing_ret(px[t], d)

# Benchmark returns table
bench_returns = pd.DataFrame(index=[f"{d}d" for d in horizons],
                             columns=["TSX/S&P Benchmark (%)"])

for d in horizons:
    bench_returns.loc[f"{d}d"] = trailing_ret(bench, d)

stock_returns = stock_returns.round(2)
bench_returns = bench_returns.round(2)

display(stock_returns)
display(bench_returns)