In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import warnings
import math

In [7]:
# CFM 101 - Group Assignment 2025
# Robo-Advising Challenge

warnings.filterwarnings('ignore')

# Configuration
INITIAL_CAPITAL = 1000000
USD_TO_CAD = 1.41
VOLUME_CHECK_START = '2023-10-01'
VOLUME_CHECK_END = '2024-09-30'
MIN_STOCKS = 10
MAX_STOCKS = 25
MAX_WEIGHT = 0.15
MAX_SECTOR_WEIGHT = 0.40

TRAINING_START = "2022-01-01"
TRAINING_END = "2024-01-02"
YEAR_AFTER_TRAINING = "2025-01-03"
RISK_FREE_RATE = 2/12
WEEKS_BACK = 52
N_STOCKS = 15

us_ticker_ex = yf.Ticker("AAPL")
cad_ticker_ex = yf.Ticker("Shop.to")

def softplus(n):
    return math.log(1 + math.e**n)

def weights(ticker_score):
    scores = [score for _, score in ticker_score]
    n = min(len(ticker_score), 15)
    min_weight_per_stock = 1.0/(2*n)
    excess_weight = 1 - n*min_weight_per_stock
    
    mean = np.mean(scores)
    std = np.std(scores)
    
    ticker_z_score = {}
    for ticker, score in ticker_score:
        ticker_z_score[ticker] = (score - mean) / std
    
    selected_tickers = []
    sectors = {}
    
    for ticker, _ in ticker_score:
        ticker_sector = get_sector(ticker)
        left_to_choose = n - len(selected_tickers)
        
        if ticker_sector not in sectors:
            sectors[ticker_sector] = []
        elif (len(sectors[ticker_sector])+1)*min_weight_per_stock > MAX_SECTOR_WEIGHT:
            continue
        
        max_weight = 0
        for sector in sectors:
            length = len(sectors[sector])
            if sector == ticker_sector:
                length += 1
            max_weight_sector = min(MAX_SECTOR_WEIGHT, 0.15*length)
            max_weight += max_weight_sector
        
        max_weight += (left_to_choose-1)*MAX_WEIGHT
        
        if max_weight < 1:
            continue
        
        sectors[ticker_sector].append(ticker)
        selected_tickers.append(ticker)
        
        if len(selected_tickers) == n:
            break
    
    ticker_softplus_score = {}
    sum_softplus = 0
    for ticker in selected_tickers:
        score = softplus(ticker_z_score[ticker])
        ticker_softplus_score[ticker] = score
        sum_softplus += score
    
    stock_weights = {ticker: min_weight_per_stock for ticker in ticker_softplus_score.keys()}
    
    add_sector_weights = {}
    sector_sum_scores = {}
    
    for sector in sectors.keys():
        add_sector_weights[sector] = 0
        sector_sum_scores[sector] = sum(ticker_softplus_score[t] for t in sectors[sector])
    
    while excess_weight > 1e-6:
        sector_sum = sum(sector_sum_scores[s] for s in sectors.keys() 
                        if abs(len(sectors[s])*min_weight_per_stock + add_sector_weights[s] - MAX_SECTOR_WEIGHT) >= 1e-6)
        
        for sector in sectors.keys():
            current_weight = len(sectors[sector])*min_weight_per_stock + add_sector_weights[sector]
            if abs(current_weight - MAX_SECTOR_WEIGHT) < 1e-6:
                continue
            toAdd = min(MAX_SECTOR_WEIGHT - current_weight, sector_sum_scores[sector]/sector_sum*excess_weight)
            add_sector_weights[sector] += toAdd
            excess_weight -= toAdd
    
    for sector in sectors.keys():
        while add_sector_weights[sector] > 1e-6:
            ticker_sum = sum(ticker_softplus_score[t] for t in sectors[sector] 
                           if abs(stock_weights[t] - MAX_WEIGHT) >= 1e-6)
            
            for ticker in sectors[sector]:
                score = ticker_softplus_score[ticker]
                toAdd = min(MAX_WEIGHT - stock_weights[ticker], score/ticker_sum*add_sector_weights[sector])
                stock_weights[ticker] += toAdd
                add_sector_weights[sector] -= toAdd
    
    return stock_weights

def first_trading_days_of_each_month(start_date, end_date):
    us_hist = us_ticker_ex.history(start=start_date, end=end_date)
    us_hist.index = us_hist.index.date
    cad_hist = cad_ticker_ex.history(start=start_date, end=end_date)
    cad_hist.index = cad_hist.index.date
    
    valid_dates = np.intersect1d(us_hist.index, cad_hist.index)
    
    first_days = []
    prev_month = 0
    for date in valid_dates:
        if date.month != prev_month:
            first_days.append(date)
            prev_month = date.month
    
    return first_days

def create_df(stocks, dates, distribution):
    start_date = dates[0]
    end_date = dates[-1] + pd.Timedelta(days=1)
    
    stocks_df = []
    
    for ticker in stocks:
        temp_df = ticker.history(start=start_date, end=end_date)
        temp_df.index = temp_df.index.date
        count = 0
        
        closes = []
        for i in range(len(temp_df)):
            if temp_df.index[i] == dates[count]:
                closes.append(temp_df['Close'].iloc[i])
                count += 1
        
        stock_df = pd.DataFrame({'Date': dates, 'Close': closes})
        stock_df = stock_df.set_index("Date")
        stocks_df.append(stock_df)
    
    money_per_stock = [dist * INITIAL_CAPITAL for dist in distribution]
    shares_in_stocks = [shares(stocks[i], stocks_df[i]['Close'].iloc[0], money_per_stock[i]) 
                       for i in range(len(stocks))]
    
    portfolio_values = []
    for i in range(len(dates)):
        portfolio_value = 0
        for j in range(len(stocks)):
            close = stocks_df[j]['Close'].iloc[i]
            if stocks[j].info['country'] != 'Canada':
                close *= USD_TO_CAD
            portfolio_value += close * shares_in_stocks[j]
        portfolio_values.append(portfolio_value)
    
    stock_portfolio = pd.DataFrame({'Date': dates, 'Portfolio Value': portfolio_values})
    stock_portfolio = stock_portfolio.set_index("Date")
    stock_portfolio['Percentage Returns'] = stock_portfolio['Portfolio Value'].pct_change() * 100
    stock_portfolio = stock_portfolio.iloc[1:]
    
    return stock_portfolio

def shares(ticker, price, money):
    cad_transaction_fee_per_stock = 0.01 * USD_TO_CAD
    cad_max_transaction_fee = 2.15 * USD_TO_CAD
    cad_price = price
    
    if ticker.info['country'] != 'Canada':
        cad_price = price * USD_TO_CAD
    
    return max(money/(cad_price + cad_transaction_fee_per_stock),
               (money - cad_max_transaction_fee)/cad_price)

def calculate_fee(shares):
    per_share_fee = shares * 0.001
    flat_fee = 2.15
    return min(per_share_fee, flat_fee)

def get_stock_data(ticker, start, end):
    data = ticker.history(start=start, end=end)
    return data if not data.empty else None

def calculate_technical_features(df):
    features = {}
    features['return_200d'] = df['Close'].pct_change(200).iloc[-1]
    features['volatility_200'] = -1 * df['Close'].pct_change().rolling(200).std().iloc[-1]
    features['sma_200'] = df['Close'].rolling(200).mean().iloc[-1]
    features['price_to_sma200'] = df['Close'].iloc[-1] / features['sma_200']
    features['rsi'] = calculate_rsi(df['Close'], 14)
    features['momentum'] = df['Close'].iloc[-1] / df['Close'].iloc[-200] - 1
    return features

def calculate_rsi(prices, period=30):
    delta = prices.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi.iloc[-1] if not rsi.empty else 50

def check_volume_requirement(ticker, start, end):
    data = ticker.history(start=start, end=end)
    if data.empty or len(data) < 50:
        return False
    return float(data['Volume'].mean()) >= 5000

def get_market_cap(ticker):
    info = ticker.info
    market_cap = info.get('marketCap', 0)
    currency = info.get('currency', 'USD')
    if currency == 'USD':
        market_cap *= USD_TO_CAD
    return market_cap / 1e9

def get_sector(ticker):
    return ticker.info.get('sector', 'Unknown')

def norm(col):
    return (col - col.mean()) / col.std()

# Load tickers
tickers_df = pd.read_csv('Tickers_Example.csv')

if 'Tickers' in tickers_df.columns:
    tickers_list = tickers_df['Tickers'].tolist()
elif 'Ticker' in tickers_df.columns:
    tickers_list = tickers_df['Ticker'].tolist()
else:
    tickers_list = tickers_df.iloc[:, 0].tolist()

tickers_list = [yf.Ticker(t) for t in tickers_list]

# Filter by volume
valid_tickers = [t for t in tickers_list if check_volume_requirement(t, VOLUME_CHECK_START, VOLUME_CHECK_END)]

# Calculate features for training period
col_names = ['ticker', 'momentum', 'price_to_sma200', 'volatility_200', 'rsi']
stock_features_dict = {name: [] for name in col_names}

for ticker in valid_tickers:
    data = get_stock_data(ticker, TRAINING_START, TRAINING_END)
    
    if data is None or len(data) < 200:
        continue
    
    features = calculate_technical_features(data)
    features['ticker'] = ticker
    
    for name in col_names:
        stock_features_dict[name].append(features[name])

stock_features_df = pd.DataFrame(stock_features_dict)
stock_features_df.set_index('ticker', inplace=True)
stock_features_df['momentum_norm'] = norm(stock_features_df['momentum'])
stock_features_df['price_to_sma200_norm'] = norm(stock_features_df['price_to_sma200'])
stock_features_df['volatility_200_norm'] = norm(stock_features_df['volatility_200'])
stock_features_df['rsi_norm'] = norm(stock_features_df['rsi'])

# Find optimal weights
TRIALS = 100
variables = ['momentum_norm', 'price_to_sma200_norm', 'volatility_200_norm', 'rsi_norm']
n = len(variables)

optimal_weights = []
max_sharpe_ratio = -100

for trial in range(TRIALS):
    var_weights = np.random.rand(n)
    var_weights = var_weights / var_weights.sum()
    
    ticker_score = []
    for i in range(len(stock_features_df)):
        score = sum(var_weights[j] * stock_features_df[variables[j]].iloc[i] for j in range(n))
        ticker_score.append([stock_features_df.index[i], score])
    
    ticker_score = sorted(ticker_score, key=lambda x: x[1], reverse=True)
    stock_weights = weights(ticker_score)
    
    tickers = list(stock_weights.keys())
    distribution = list(stock_weights.values())
    dates = first_trading_days_of_each_month(TRAINING_END, YEAR_AFTER_TRAINING)
    
    df = create_df(tickers, dates, distribution)
    sharpe = (df['Percentage Returns'].mean() - RISK_FREE_RATE) / df['Percentage Returns'].std()
    
    if sharpe > max_sharpe_ratio:
        max_sharpe_ratio = sharpe
        optimal_weights = var_weights

# Build final portfolio
stock_features_dict = {name: [] for name in col_names}

for ticker in valid_tickers:
    data = get_stock_data(ticker, datetime.now()-timedelta(weeks=WEEKS_BACK+52), datetime.now()-timedelta(weeks=WEEKS_BACK))
    
    if data is None or len(data) < 200:
        continue
    
    features = calculate_technical_features(data)
    features['ticker'] = ticker
    for name in col_names:
        stock_features_dict[name].append(features[name])

stock_features_df = pd.DataFrame(stock_features_dict)
stock_features_df.set_index('ticker', inplace=True)
stock_features_df['momentum_norm'] = norm(stock_features_df['momentum'])
stock_features_df['price_to_sma200_norm'] = norm(stock_features_df['price_to_sma200'])
stock_features_df['volatility_200_norm'] = norm(stock_features_df['volatility_200'])
stock_features_df['rsi_norm'] = norm(stock_features_df['rsi'])

ticker_score = []
for i in range(len(stock_features_df)):
    score = sum(optimal_weights[j] * stock_features_df[variables[j]].iloc[i] for j in range(n))
    ticker_score.append([stock_features_df.index[i], score])

ticker_score = sorted(ticker_score, key=lambda x: x[1], reverse=True)
distribution = weights(ticker_score)

# Select stocks
selected_stocks = []

for ticker in distribution.keys():
    market_cap = get_market_cap(ticker)
    sector = get_sector(ticker)
    
    if market_cap == 0:
        continue
    
    selected_stocks.append({
        'Ticker': ticker,
        'market_cap': market_cap,
        'sector': sector
    })

portfolio_df = pd.DataFrame(selected_stocks)
portfolio_df['Weight'] = distribution.values()

# Adjust for sector constraint
sector_weights = portfolio_df.groupby('sector')['Weight'].sum()

for sector in sector_weights[sector_weights > MAX_SECTOR_WEIGHT].index:
    sector_mask = portfolio_df['sector'] == sector
    scale_factor = MAX_SECTOR_WEIGHT / sector_weights[sector]
    portfolio_df.loc[sector_mask, 'Weight'] *= scale_factor

portfolio_df['Weight'] = portfolio_df['Weight'] / portfolio_df['Weight'].sum()

# Get current prices
portfolio_df['Price_USD'] = 0.0
portfolio_df['Currency'] = ''

for idx, row in portfolio_df.iterrows():
    ticker = row['Ticker']
    current_data = get_stock_data(ticker, (datetime.now() - timedelta(weeks=WEEKS_BACK)).strftime('%Y-%m-%d'), 
                                  datetime.now().strftime('%Y-%m-%d'))
    
    if current_data is not None:
        portfolio_df.at[idx, 'Initial_Price'] = current_data['Close'].iloc[0]
        portfolio_df.at[idx, 'Final_Price'] = current_data['Close'].iloc[-1]
        portfolio_df.at[idx, 'Currency'] = ticker.info.get('currency', 'USD')

# Calculate shares and fees
total_fees = 0
portfolio_df['Shares'] = 0.0
portfolio_df['Fee_CAD'] = 0.0

for idx, row in portfolio_df.iterrows():
    weight = row['Weight']
    price = row['Initial_Price']
    currency = row['Currency']
    
    allocation_cad = INITIAL_CAPITAL * weight
    
    if currency == 'CAD':
        allocation_purchase = allocation_cad
        price_purchase = price
    else:
        allocation_purchase = allocation_cad / USD_TO_CAD
        price_purchase = price
    
    shares = allocation_purchase / price_purchase
    fee_usd = calculate_fee(shares)
    fee_cad = fee_usd * USD_TO_CAD
    
    allocation_after_fee = allocation_purchase - fee_usd
    shares_final = allocation_after_fee / price_purchase
    
    portfolio_df.at[idx, 'Shares'] = shares_final
    portfolio_df.at[idx, 'Fee_CAD'] = fee_cad
    total_fees += fee_cad

# Calculate final values
portfolio_df['Value_CAD'] = 0.0
portfolio_df['Price'] = 0.0

for idx, row in portfolio_df.iterrows():
    shares = row['Shares']
    currency = row['Currency']
    final = row['Final_Price']
    
    if currency == 'CAD':
        value_cad = shares * final
        portfolio_df.at[idx, 'Price'] = final
    else:
        value_cad = shares * final * USD_TO_CAD
        portfolio_df.at[idx, 'Price'] = final * USD_TO_CAD
    
    portfolio_df.at[idx, 'Value_CAD'] = value_cad

total_portfolio_value = portfolio_df['Value_CAD'].sum()
portfolio_df['Weight'] = portfolio_df['Value_CAD'] / total_portfolio_value

# Output
print("\n" + "="*80)
print("FINAL PORTFOLIO")
print("="*80)

final_portfolio = portfolio_df[['Ticker', 'Price', 'Currency', 'Shares', 'Value_CAD', 'Weight']].copy()
final_portfolio.columns = ['Ticker', 'Price', 'Currency', 'Shares', 'Value', 'Weight']
final_portfolio.index = range(1, len(final_portfolio) + 1)
final_portfolio['Weight'] = (final_portfolio['Weight'] * 100).round(2)
final_portfolio['Value'] = final_portfolio['Value'].round(2)
final_portfolio['Shares'] = final_portfolio['Shares'].round(4)
final_portfolio['Price'] = final_portfolio['Price'].round(2)

print(final_portfolio)
print("\n" + "-"*80)
print(f"Total Portfolio Value: ${total_portfolio_value:,.2f} CAD")
print(f"Total Fees Paid: ${total_fees:,.2f} CAD")

$AGN: possibly delisted; no timezone found
$CELG: possibly delisted; no timezone found
$CELG: possibly delisted; no timezone found
$MON: possibly delisted; no timezone found
$MON: possibly delisted; no timezone found
$RTN: possibly delisted; no timezone found
$RTN: possibly delisted; no timezone found



FINAL PORTFOLIO
                            Ticker    Price Currency     Shares      Value  \
1      yfinance.Ticker object <BK>   150.07      USD   593.6203   89082.41   
2      yfinance.Ticker object <MO>    82.05      USD  2011.1723  165012.46   
3      yfinance.Ticker object <PM>   218.89      USD   695.8771  152319.44   
4   yfinance.Ticker object <RY.TO>   211.38      CAD   344.0504   72725.37   
5     yfinance.Ticker object <BAC>    72.70      USD   790.4065   57462.24   
6     yfinance.Ticker object <AXP>   497.57      USD   116.0091   57723.21   
7     yfinance.Ticker object <BLK>  1430.76      USD    33.0732   47319.62   
8    yfinance.Ticker object <PYPL>    85.40      USD   363.3539   31031.77   
9     yfinance.Ticker object <USB>    67.37      USD   587.5032   39579.97   
10    yfinance.Ticker object <LMT>   649.70      USD   129.0413   83838.12   
11      yfinance.Ticker object <C>   139.17      USD   412.6018   57420.56   
12     yfinance.Ticker object <PG>   212.80    