In [None]:
!pip install yfinance fpdf matplotlib numpy pandas scipy
!pip install cvxpy
!pip install reportlab
%matplotlib inline

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from scipy.optimize import minimize
from fpdf import FPDF
import os

# --- CONFIGURATION ---
START_DATE = datetime.now() - timedelta(days=365*5)
END_DATE = datetime.now()
MIN_AVG_VOLUME = 1_000_000  # minimum avg daily volume filter
MIN_5Y_RETURN = 0.5         # minimum 50% total return over 5 years
RISK_FREE_RATE = 0.04       # annual risk-free rate for Sharpe ratio
MAX_STOCKS = 10             # max number of stocks in the optimized portfolio
MARKET_REGIME = 'risk-on'   # Use data or input to toggle 'risk-on' or 'risk-off'

# --- PRIORITY SECTORS BASED ON REGIME ---
if MARKET_REGIME == 'risk-on':
    PRIORITY_SECTORS = ['Technology', 'Industrials', 'Utilities']
    EXCLUDE_SECTORS = ['Consumer Staples', 'Insurance', 'Materials']
else:
    PRIORITY_SECTORS = ['Healthcare', 'Consumer Staples', 'Utilities']
    EXCLUDE_SECTORS = ['Technology', 'Aerospace']

# --- INPUT: Get S&P 500 tickers dynamically ---
def get_sp500_tickers():
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(url)
    sp500_table = tables[0]
    return sp500_table['Symbol'].tolist()

# --- DATA DOWNLOAD FUNCTIONS ---
def download_price_data(tickers):
    print("Downloading price and volume data...")
    data = yf.download(tickers, start=START_DATE.strftime('%Y-%m-%d'), end=END_DATE.strftime('%Y-%m-%d'),
                       auto_adjust=True, progress=False)
    close = data['Close']
    volume = data['Volume']
    return close, volume

def download_fundamentals(tickers):
    print("Downloading fundamentals (PE, Dividend Yield, Sector, EPS Growth, Operating Margin, FCF Margin)...")
    fundamentals = {}
    for t in tickers:
        try:
            info = yf.Ticker(t).info
            # Calculate FCF Margin safely (avoid division by zero)
            fcf = info.get('freeCashflow', np.nan)
            revenue = info.get('totalRevenue', np.nan)
            fcf_margin = fcf / revenue if fcf and revenue else np.nan
            
            fundamentals[t] = {
                'PE Ratio': info.get('trailingPE', np.nan),
                'Dividend Yield': info.get('dividendYield', 0.0) or 0.0,
                'Sector': info.get('sector', 'Unknown'),
                'EPS Growth': info.get('earningsQuarterlyGrowth', np.nan),
                'Operating Margin': info.get('operatingMargins', np.nan),
                'FCF Margin': fcf_margin
            }
        except Exception as e:
            print(f"Warning: Could not fetch fundamentals for {t}: {e}")
            fundamentals[t] = {'PE Ratio': np.nan, 'Dividend Yield': 0.0, 'Sector': 'Unknown',
                              'EPS Growth': np.nan, 'Operating Margin': np.nan, 'FCF Margin': np.nan}
    return pd.DataFrame.from_dict(fundamentals, orient='index')

# --- FILTERING FUNCTION ---
def calc_5y_returns(close):
    return (close.iloc[-1] / close.iloc[0]) - 1

def filter_stocks(close, volume, fundamentals):
    print("Filtering stocks based on volume, returns, fundamentals and sector preferences...")
    avg_volume = volume.mean()
    total_return = calc_5y_returns(close)

    # Base filters for volume and returns
    base_filter = (avg_volume > MIN_AVG_VOLUME) & (total_return > MIN_5Y_RETURN)
    filtered_tickers = []

    for t in base_filter[base_filter].index:
        try:
            f = fundamentals.loc[t]
            # Fundamental and sector filters
            if (not pd.isna(f['EPS Growth']) and f['EPS Growth'] >= 0.15 and
                not pd.isna(f['Operating Margin']) and f['Operating Margin'] > 0 and
                not pd.isna(f['FCF Margin']) and f['FCF Margin'] > 0.10 and
                f['Sector'] in PRIORITY_SECTORS and f['Sector'] not in EXCLUDE_SECTORS):
                filtered_tickers.append(t)
        except KeyError:
            continue

    print(f"Filtered down to {len(filtered_tickers)} quality tickers.")
    return filtered_tickers, avg_volume[filtered_tickers], total_return[filtered_tickers]

# --- PORTFOLIO OPTIMIZATION ---
def calculate_portfolio_weights(returns):
    print("Optimizing portfolio weights to maximize Sharpe ratio...")
    mean_returns = returns.mean() * 252  # annualized
    cov_matrix = returns.cov() * 252     # annualized covariance
    num_assets = len(mean_returns)

    def neg_sharpe(weights):
        ret = np.dot(weights, mean_returns)
        vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        sharpe = (ret - RISK_FREE_RATE) / vol
        return -sharpe

    constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
    bounds = tuple((0, 1) for _ in range(num_assets))
    init_guess = num_assets * [1. / num_assets]

    opt = minimize(neg_sharpe, init_guess, method='SLSQP', bounds=bounds, constraints=constraints)
    weights = pd.Series(opt.x, index=mean_returns.index)

    ret = np.dot(weights, mean_returns)
    vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe = (ret - RISK_FREE_RATE) / vol

    print(f"Optimal portfolio expected annual return: {ret:.2%}")
    print(f"Optimal portfolio annual volatility: {vol:.2%}")
    print(f"Optimal portfolio Sharpe ratio: {sharpe:.2f}")
    return weights, ret, vol, sharpe

# --- PLOTTING FUNCTIONS ---
def plot_price_charts(close, portfolio_tickers):
    print("Generating price charts...")
    n = len(portfolio_tickers)
    max_per_image = 8
    num_images = (n + max_per_image - 1) // max_per_image
    os.makedirs('charts/price_charts', exist_ok=True)
    image_files = []

    for img_num in range(num_images):
        subset = portfolio_tickers[img_num * max_per_image : (img_num + 1) * max_per_image]
        ncols = 2
        nrows = (len(subset) + 1) // 2
        fig, axs = plt.subplots(nrows, ncols, figsize=(12, 4 * nrows), sharex=True)
        axs = axs.flatten()

        for i, t in enumerate(subset):
            axs[i].plot(close[t], label=t)
            axs[i].set_title(f"{t} Price History (5 Years)")
            axs[i].set_ylabel('Price (USD)')
            axs[i].legend()

        # Remove empty plots
        for j in range(i + 1, len(axs)):
            fig.delaxes(axs[j])

        plt.tight_layout()
        image_file = f'charts/price_charts/price_charts_part_{img_num + 1}.png'
        plt.savefig(image_file)
        image_files.append(image_file)
        plt.close()

    print(f"Saved {len(image_files)} price chart image(s).")
    return image_files

def plot_portfolio_allocation(weights):
    print("Plotting portfolio allocation pie chart...")
    plt.figure(figsize=(8,6))
    weights = weights[weights > 0.01]
    plt.pie(weights, labels=weights.index, autopct='%1.1f%%', startangle=140)
    plt.title('Portfolio Allocation')
    plt.tight_layout()
    os.makedirs('charts', exist_ok=True)
    plt.savefig('charts/portfolio_allocation.png')
    plt.close()

def plot_efficient_frontier(returns):
    print("Plotting efficient frontier...")
    mean_returns = returns.mean() * 252
    cov_matrix = returns.cov() * 252
    num_portfolios = 10000
    results = np.zeros((3, num_portfolios))

    for i in range(num_portfolios):
        weights = np.random.random(len(mean_returns))
        weights /= np.sum(weights)
        port_return = np.dot(weights, mean_returns)
        port_vol = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        port_sharpe = (port_return - RISK_FREE_RATE) / port_vol

        results[0,i] = port_return
        results[1,i] = port_vol
        results[2,i] = port_sharpe

    max_sharpe_idx = np.argmax(results[2])
    max_sharpe_return = results[0, max_sharpe_idx]
    max_sharpe_volatility = results[1, max_sharpe_idx]

    plt.figure(figsize=(10,7))
    plt.scatter(results[1], results[0], c=results[2], cmap='YlGnBu', marker='o', s=10, alpha=0.3)
    plt.title('Efficient Frontier with Random Portfolios')
    plt.xlabel('Volatility (Risk)')
    plt.ylabel('Return')
    plt.colorbar(label='Sharpe Ratio')
    plt.scatter(max_sharpe_volatility, max_sharpe_return, marker='*', color='r', s=200, label='Maximum Sharpe Ratio Portfolio')
    plt.legend(loc='upper left')
    plt.grid(True)
    os.makedirs('charts', exist_ok=True)
    plt.savefig('charts/efficient_frontier.png')
    plt.close()
    print("Efficient frontier plot saved.")

# --- PDF REPORT GENERATION ---
def create_pdf_report(weights, avg_volume, total_return, fundamentals, ret, vol, sharpe, image_files):
    print("Generating multi-page PDF report...")
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)

    pdf.add_page()
    pdf.set_font("Arial", "B", 16)
    pdf.cell(0, 10, "Investment Portfolio Report", 0, 1, 'C')
    pdf.ln(10)

    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "Executive Summary", 0, 1)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 8,
                   f"This report presents an optimised portfolio based on stocks filtered by minimum average volume "
                   f"of {MIN_AVG_VOLUME:,} shares and a minimum 5-year total return of {MIN_5Y_RETURN*100:.0f}%.\n"
                   f"Stocks are also filtered by fundamental growth, profitability metrics, and sector tilts based on a "
                   f"'{MARKET_REGIME}' market regime.\n\n"
                   f"Portfolio expected annual return: {ret:.2%}\n"
                   f"Portfolio annual volatility: {vol:.2%}\n"
                   f"Portfolio Sharpe ratio: {sharpe:.2f}\n\n"
                   f"The portfolio consists of {len(weights[weights > 0.01])} stocks with the following allocation:")

    pdf.ln(5)
    pdf.set_font("Arial", "B", 12)
    pdf.cell(50, 8, "Ticker", 1)
    pdf.cell(50, 8, "Allocation (%)", 1)
    pdf.cell(50, 8, "Avg Volume", 1)
    pdf.cell(50, 8, "Dividend Yield (%)", 1)
    pdf.ln()
    pdf.set_font("Arial", size=12)
    for t, w in weights[weights > 0.01].items():
        dividend_yield = fundamentals.loc[t, 'Dividend Yield']
        pdf.cell(50, 8, t, 1)
        pdf.cell(50, 8, f"{w*100:.2f}", 1)
        pdf.cell(50, 8, f"{avg_volume[t]:,.0f}", 1)
        pdf.cell(50, 8, f"{dividend_yield:.2f}", 1)
        pdf.ln()

    # Add price charts to PDF, each on a new page
    for img_file in image_files:
        pdf.add_page()
        pdf.set_font("Arial", "B", 14)
        pdf.cell(0, 10, "Price Chart", 0, 1)
        pdf.ln(5)
        pdf.image(img_file, x=10, y=20, w=190)

    # Portfolio allocation pie chart
    pdf.add_page()
    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "Portfolio Allocation", 0, 1)
    pdf.ln(5)
    pdf.image("charts/portfolio_allocation.png", x=10, y=20, w=190)

    # Efficient frontier plot
    pdf.add_page()
    pdf.set_font("Arial", "B", 14)
    pdf.cell(0, 10, "Efficient Frontier", 0, 1)
    pdf.ln(5)
    pdf.image("charts/efficient_frontier.png", x=10, y=20, w=190)

    output_path = "portfolio_report.pdf"
    pdf.output(output_path)
    print(f"PDF report generated: {output_path}")

# --- MAIN EXECUTION ---
def main():
    tickers = get_sp500_tickers()
    close, volume = download_price_data(tickers)
    fundamentals = download_fundamentals(tickers)
    selected, avg_vol, total_ret = filter_stocks(close, volume, fundamentals)
    
    if not selected:
        print("No stocks passed the filtering criteria. Exiting.")
        return
    
    returns = close[selected].pct_change().dropna()
    weights, exp_ret, exp_vol, sharpe = calculate_portfolio_weights(returns)

    portfolio_tickers = weights[weights > 0.01].index.tolist()

    image_files = plot_price_charts(close, portfolio_tickers)
    plot_portfolio_allocation(weights)
    plot_efficient_frontier(returns)
    create_pdf_report(weights, avg_vol, total_ret, fundamentals, exp_ret, exp_vol, sharpe, image_files)

if __name__ == "__main__":
    main()
