importing libraries and defining the selection criteria(metrics used-market capitalisation,P/E ratio and sector)

In [None]:
import requests
import pandas as pd
import numpy as np
import yfinance as yf
import scipy.optimize as sco
import matplotlib.pyplot as plt

LARGE_CAP_MIN = 50_000_000_000   
MID_CAP_MIN = 10_000_000_000     
SMALL_CAP_MIN = 2_000_000_000    

PE_MAX = 25  # Max P/E Ratio
SECTORS = ["Technology", "Healthcare", "Consumer Defensive", "Energy", "Financials", "Industrials"]

downloading S&P500 company list form wikipedia

Symbol-Ticker,Security-Company Name,GICS Sector-Industry Sector

In [None]:
#Scraping data from wikipedia
def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    tables = pd.read_html(url)
    df = tables[0]
    #relevant columns
    df = df[["Symbol", "Security", "GICS Sector"]].rename(columns={"Symbol": "symbol", "Security": "name", "GICS Sector": "sector"})

    return df

Assigning mock values in this function is useful for testing and development when real financial data is not available or API calls are restricted

In [None]:
#Get Stock Financial Data
def get_stock_data(df):
    #assigning mock values for market cap,P/E ratio and last dividend
    df["mktCap"] = df["symbol"].apply(lambda x: 50_000_000_000 if x[0] < "I" else (15_000_000_000 if x[0] < "R" else 5_000_000_000)) 
    df["pe"] = 20 
    df["lastDiv"] = 1.5
    return df

Apply Stock Filtering Criteria

In [None]:
def filter_stocks(df):
    df = df[(df["mktCap"] >= SMALL_CAP_MIN) &  # Exclude micro-cap stocks
            (df["pe"] <= PE_MAX) & 
            (df["lastDiv"] > 0) & 
            (df["sector"].isin(SECTORS))]
    
    # Assign market cap category
    df["Market_Cap_Category"] = df["mktCap"].apply(
        lambda x: "Large-Cap" if x >= LARGE_CAP_MIN else ("Mid-Cap" if x >= MID_CAP_MIN else "Small-Cap")
    )
    
    return df

Select 10 Stocks (At Least 3 Per Market Cap & Ensure Sector Diversification)

In [None]:
def select_diversified_stocks(df):
    selected_stocks = []
    sector_count = {}  # To track sector representation
    
    # Group stocks by Market Cap Category
    grouped = df.groupby("Market_Cap_Category")

    for category, group in grouped:
        group = group.sort_values(by="mktCap", ascending=False)  # Sort by Market Cap
        count = 0
        
        for _, row in group.iterrows():
            if count >= 3:
                break  # Stop after selecting 3 stocks from this category
            
            # Ensure sector diversification
            if sector_count.get(row["sector"], 0) < 3:  
                selected_stocks.append(row["symbol"])
                sector_count[row["sector"]] = sector_count.get(row["sector"], 0) + 1
                count += 1

    # If we have less than 10 stocks, fill remaining slots
    if len(selected_stocks) < 10:
        remaining_stocks = df[~df["symbol"].isin(selected_stocks)]
        for _, row in remaining_stocks.iterrows():
            if len(selected_stocks) >= 10:
                break
            selected_stocks.append(row["symbol"])

    return selected_stocks[:10]

In [None]:
sp500_df = get_sp500_tickers()
stock_data_df = get_stock_data(sp500_df)
filtered_stocks = filter_stocks(stock_data_df)
selected_tickers = select_diversified_stocks(filtered_stocks)

print("Selected Stocks for Portfolio:", selected_tickers)

code for selecting portfolio optmisation and weight allocation strategy

In [None]:
tickers = selected_tickers  
benchmark_ticker = 'SPY'
data = yf.download(selected_tickers + [benchmark_ticker], start='2022-01-01')['Close']
returns = data.pct_change().dropna()

In [None]:
benchmark_returns = returns[benchmark_ticker]
returns = returns[selected_tickers]

#mean and cov matrix with only selected_tickers
mean_returns = returns.mean()
cov_matrix = returns.cov()

tickers = selected_tickers  
risk_free_rate = 0.02 

so finally we consider all three parameters returns,volatility and sharpe ratio and compute a finall score for all 5 strategies-MVO,EWP,IVP,GMVP,MDP and we use live data to find which strategy would be the best

In [None]:
data = yf.download(selected_tickers, period='1y')['Close']
returns = data.pct_change().dropna()

mean_returns = returns.mean()
cov_matrix = returns.cov()
volatilities = returns.std()
risk_free_rate = 0.02
rebalance_dates = returns.resample('M').first().index
weights_over_time = []

#equal weights portfolio
def get_portfolio_stats(weights):
    port_return = np.dot(weights, mean_returns)
    port_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    port_sharpe = (port_return - risk_free_rate) / port_vol
    return port_return, port_vol, port_sharpe

constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
bounds = tuple((0, 1) for _ in selected_tickers)
init_guess = np.array([1/len(selected_tickers)] * len(selected_tickers))

ewp_weights = init_guess
ewp_ret, ewp_vol, ewp_sharpe = get_portfolio_stats(ewp_weights)

#mean variance optimisation(MVO)
def negative_sharpe(weights):
    return -get_portfolio_stats(weights)[2]

opt = sco.minimize(negative_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
mvo_weights = opt.x
mvo_ret, mvo_vol, mvo_sharpe = get_portfolio_stats(mvo_weights)

#GVMP(global minimum variance portfolio)
def portfolio_volatility(weights):
    return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

opt = sco.minimize(portfolio_volatility, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
gmvp_weights = opt.x
gmvp_ret, gmvp_vol, gmvp_sharpe = get_portfolio_stats(gmvp_weights)

#Inverse Volatility Portfolio (IVP)
ivp_weights = 1 / volatilities
ivp_weights /= ivp_weights.sum()
ivp_ret, ivp_vol, ivp_sharpe = get_portfolio_stats(ivp_weights)

#Most Diversified Portfolio (MDP)
def neg_diversification_ratio(weights):
    portfolio_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    weighted_vol = np.dot(weights, volatilities)
    return -weighted_vol / portfolio_vol

opt = sco.minimize(neg_diversification_ratio, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
mdp_weights = opt.x
mdp_ret, mdp_vol, mdp_sharpe = get_portfolio_stats(mdp_weights)

#Compare all Strategies using the three metrics
results = pd.DataFrame({
    'Strategy': ['EWP', 'MVO', 'GMVP', 'IVP', 'MDP'],
    'Return': [ewp_ret, mvo_ret, gmvp_ret, ivp_ret, mdp_ret],
    'Volatility': [ewp_vol, mvo_vol, gmvp_vol, ivp_vol, mdp_vol],
    'Sharpe Ratio': [ewp_sharpe,mvo_sharpe,gmvp_sharpe,ivp_sharpe,mdp_sharpe]
})

#composite score
results["Return Score"] = results["Return"] / results["Return"].max()
results["Sharpe Score"] = results["Sharpe Ratio"] / results["Sharpe Ratio"].max()
results["Volatility Score"] = results["Volatility"].min() / results["Volatility"]


metric_weights = [0.4, 0.4, 0.2]
results["Final Score"] = (
    metric_weights[0] * results["Return Score"] +
    metric_weights[1] * results["Sharpe Score"] +
    metric_weights[2] * results["Volatility Score"]
)

best_strategy = results.loc[results["Final Score"].idxmax()]
print("Best Strategy Based on Live Market Conditions:", best_strategy["Strategy"])
print(results[['Strategy', 'Return', 'Volatility', 'Sharpe Ratio', 'Final Score']])
best_strategy=best_strategy["Strategy"]

by considering all the factors and metrics-return,volatility,sharpe ratio and final score we can conclude that GMVP would be the best strategy

In [None]:
if best_strategy == 'EWP':
    selected_weights = ewp_weights
elif best_strategy == 'MVO':
    selected_weights = mvo_weights
elif best_strategy == 'GMVP':
    selected_weights = gmvp_weights
elif best_strategy == 'IVP':
    selected_weights = ivp_weights
elif best_strategy == 'MDP':
    selected_weights = mdp_weights

final_weights = pd.Series(selected_weights, index=selected_tickers)
print("Optimal Weights for", best_strategy)
print(final_weights)

plotting graphs and visual representations of the cumulative returns,portfolio value,drawdown analysis and change in portfolio weights over time 

In [None]:
start_date = "2023-01-01"
end_date = "2025-01-01"
initial_capital = 10000

data = yf.download(tickers, start=start_date, end=end_date)['Close']
returns = data.pct_change().dropna()
rebalance_dates = returns.resample('ME').first().index

weights_over_time = []
port_returns = []

In [None]:
strategy=best_strategy
for date in rebalance_dates:
    window = returns.loc[:date].tail(60)
    if window.shape[0] < 20:
        continue

    mean_returns = window.mean()
    cov_matrix = window.cov()
    vol = window.std()
    init_guess = np.array([1/len(tickers)] * len(tickers))
    bounds = tuple((0, 1) for _ in tickers)
    constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}

    #Strategy selection
    if strategy == "GMVP":
        def portfolio_vol(weights):
            return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        opt = sco.minimize(portfolio_vol, init_guess, method='SLSQP',
                           bounds=bounds, constraints=constraints)
        weights = opt.x if opt.success else init_guess

    elif strategy == "MDP":
        def risk_budget_objective(weights):
            portfolio_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
            marginal_risk = np.dot(cov_matrix, weights) / portfolio_vol
            risk_contributions = weights * marginal_risk
            return np.sum((risk_contributions - portfolio_vol / len(tickers)) ** 2)
        opt = sco.minimize(risk_budget_objective, init_guess, method='SLSQP',
                           bounds=bounds, constraints=constraints)
        weights = opt.x if opt.success else init_guess

    elif strategy == "EWP":
        weights = init_guess

    elif strategy == "MVO":
        def negative_sharpe(weights, mean_returns, cov_matrix, risk_free_rate=0.02):
            ret = np.dot(weights, mean_returns)
            vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
            return -(ret - risk_free_rate) / vol
        opt = sco.minimize(negative_sharpe, init_guess, args=(mean_returns, cov_matrix),
                           method='SLSQP', bounds=bounds, constraints=constraints)
        weights = opt.x if opt.success else init_guess

    elif strategy == "IVP":
        inv_vol = 1 / vol
        weights = inv_vol / inv_vol.sum()

    else:
        raise ValueError("Unsupported strategy selected.")
    weights_series = pd.Series(weights, index=tickers, name=date)
    weights_over_time.append(weights_series)

    next_month_returns = returns.loc[date:].iloc[:21]  # approx 1 month
    port_return = (next_month_returns @ weights_series).sum()
    port_returns.append([date, port_return])

In [None]:
weights_df = pd.DataFrame(weights_over_time)
returns_df = pd.DataFrame(port_returns, columns=["Date", "Portfolio_Return"]).set_index("Date")

cumulative_returns = (1 + returns_df["Portfolio_Return"]).cumprod()
portfolio_value = initial_capital * cumulative_returns
rolling_max = cumulative_returns.cummax()
drawdown = (cumulative_returns - rolling_max) / rolling_max

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(14, 18), sharex=True)

#Portfolio Weights
weights_df.plot.area(ax=axes[0], colormap='tab20')
axes[0].set_title(f"Portfolio Weights Over Time ({strategy})")
axes[0].set_ylabel("Weight")
axes[0].grid(True)

#Drawdown
drawdown.plot(ax=axes[1], color='red', lw=2)
axes[1].set_title(f"Drawdown Over Time ({strategy})")
axes[1].set_ylabel("Drawdown")
axes[1].grid(True)

#Cumulative Return
cumulative_returns.plot(ax=axes[2], color='blue', lw=2)
axes[2].set_title(f"Cumulative Return Over Time ({strategy})")
axes[2].set_ylabel("Cumulative Return")
axes[2].grid(True)

#Portfolio Value
portfolio_value.plot(ax=axes[3], color='green', lw=2)
axes[3].set_title(f"Portfolio Value Over Time (Initial ${initial_capital:,})")
axes[3].set_ylabel("Portfolio Value ($)")
axes[3].set_xlabel("Date")
axes[3].grid(True)

plt.tight_layout()
plt.show()

In [None]:
def portfolio_returns_from_weights(prices: pd.DataFrame, weights, initial_capital=10000):
    returns = prices.pct_change().dropna()
    if not isinstance(weights, pd.Series):
        weights = pd.Series(weights, index=prices.columns)

    #Weighted returns per day
    portfolio_returns = (returns * weights).sum(axis=1)
    portfolio_value = (1 + portfolio_returns).cumprod() * initial_capital

    total_return = portfolio_value.iloc[-1] - initial_capital
    return_percentage = (portfolio_value.iloc[-1] / initial_capital - 1) * 100

    return {
        "portfolio_returns": portfolio_returns,
        "portfolio_value": portfolio_value,
        "total_return": total_return,
        "return_percentage": return_percentage
    }

In [None]:
prices = yf.download(tickers, start="2023-01-01", end="2025-01-01")['Close']
weights = final_weights
result = portfolio_returns_from_weights(prices, weights)

print("Total Return: $", round(result['total_return'], 2))
print("Return Percentage:", round(result['return_percentage'], 2), "%")