In [None]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 10
### Team Member Names: David, Tanvi, Johan
### Team Strategy Chosen: Market Meet

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.

-------------------------------------------------------------------------------------------------------------------------------------------
## Competition Goal: Market Meet

Goal: Try to have the 25-stock portfolio’s returns match the average of the TSX composite + S&P 500’s return (~0.370%) as accurately as possible, over the 5-day period (Nov. 24 to Nov. 28).

- **S&P/TSX Composite Index** (`^GSPTSE`)
- **S&P 500 Index** (`^GSPC`)

| Index | YTD Return | Daily Return (÷ 252)| 5-Day Estimated Return |
|:------|:-----------:|:--------------------:|:----------------------:|
| TSX Composite | 21.74% | 0.0863% | 0.43% |
| S&P 500 | 15.55% | 0.0617% | 0.31% |
| **Average (50/50)** | — | — | **≈ 0.370%** |

Use historical data (daily returns) from `yfinance` to estimate:
  - Mean returns
  - Volatility (standard deviation)
  - Sharpe ratio (risk-adjusted return)
  - Beta and alpha risk
  - Correlation with the benchmark
  - Idiosyncratic (residual) risk

Select 10–25 stocks (we aim for 25 if possible) that:
- Have **beta ≈ 1** and high correlation with the benchmark
- Are liquid (average daily volume ≥ 5,000 shares)
- Have sector diversification (no sector >40% of total value)
- Include at least one large-cap (> 10B CAD) and one small-cap (< $2B CAD)

Weights are between (100 / (2n))% and 15% (for 25 stocks, between 2% and 15%)
- Spend approximately **$1,000,000 CAD**, net of trading fees:
- Fees = min(2.15 USD, 0.001 USD * shares) per trade, applied to all purchases
-------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# Global parameters and settings

plt.style.use("default")

CAD_budget = 1_000_000

min_stocks = 10
max_stocks = 25
desired_num_stocks = 25  # we aim for 25 if the universe allows

TSX_Index = "^GSPTSE"
SP500_Index = "^GSPC"

# Risk-free rate per day (very small over a short horizon, we approximate as 0)
risk_free_daily = 0.0

# Holding period in trading days (given by assignment)
holding_days = 5

In [None]:
pd.set_option("display.float_format", "{:,.0f}".format)


tickers_df = pd.read_csv("Tickers_Example.csv", header=None, names=["Ticker"])
tickers_list = tickers_df["Ticker"].tolist()

print("Total tickers loaded:", len(tickers_list))



#helper to check if ticker belongs to US or CA markets only

def is_us_or_canadian(info):
    exchange = info.get("exchange", "")
    valid_exchanges = ["NMS", "NGM", "NYSE", "NYQ", "TOR", "TSX"] 
    
    return exchange in valid_exchanges


# get industry, sector, daily prices, and avg daily volume
start_ytd = "2025-01-01"         
volume_start = "2024-10-01"
volume_end   = "2025-09-30"

all_data = []
daily_data_dict = {}     # stores full daily price dataframe fpr each ticker


for ticker in tickers_list:
    print("Fetching:", ticker)

    t = yf.Ticker(ticker)
    info = t.info

    # Skip non-US/CA tickers
    if not is_us_or_canadian(info):
        print(" → Skipped (not US/CA market)\n")
        continue

    industry = info.get("industry")
    sector = info.get("sector")

    #  Historical YTD Daily Data
    hist = t.history(start=start_ytd, end=datetime.today().strftime("%Y-%m-%d"), interval="1d")

    if hist.empty:
        print(" → Skipped (no price data)\n")
        continue

    hist = hist[["Close", "Volume"]].copy()
    hist.index = hist.index.strftime("%Y-%m-%d")  #remove time

    daily_data_dict[ticker] = hist   # store the raw daily data

   
    # get average daily volume between: Oct 1 2024 → Sep 30 2025
    # drop months with < 18 trading days
  
    subset = hist.loc[volume_start:volume_end].copy()

    # Add month label yyyy-mm
    subset["Month"] = subset.index.str.slice(0, 7)

    # Count trading days per month
    days_per_month = subset.groupby("Month")["Volume"].count()

    # Only keep months with > 18 trading days (per assignment rule)
    valid_months = days_per_month[days_per_month >= 18].index.tolist()

    filtered = subset[subset["Month"].isin(valid_months)]

    avg_daily_volume = filtered["Volume"].mean()

    # get other data
    all_data.append({
        "Ticker": ticker,
        "Industry": industry,
        "Sector": sector,
        "Avg Daily Volume (Oct24–Sep25)": avg_daily_volume
    })

    print(" fetched \n")


# convert to DataFrame
meta_df = pd.DataFrame(all_data)

print("\nFinal Extracted Metadata:")
display(meta_df)

''' #checking if data stored properly
for t in daily_data_dict:
    print(f"\n Daily Data for {t}")
    display(daily_data_dict[t].head())
'''