In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils

# --- 4. PORTFOLIO VALUE ---
PORTFOLIO_VALUE = 1000000

# --- 5. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt\_working
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


# FinRL Example of Mean Variance Optimization
* https://github.com/AI4Finance-Foundation/FinRL/blob/master/examples/Stock_NeurIPS2018_3_Backtest.ipynb

In [None]:
train = pd.read_csv(PARENT_DIR / 'train_data.csv')
trade = pd.read_csv(PARENT_DIR / 'trade_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

print(f'train:\n{train}')
print(f'\ntrade:\n{trade}')

In [None]:
def process_df_for_mvo(df):
  return df.pivot(index="date", columns="tic", values="close")

In [None]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai
# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

In [None]:
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

In [None]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print(f'arReturns in k-portfolio 1\n',arReturns)
print(f'\nMean returns of assets in k-portfolio 1\n', meanReturns)
print(f'\nVariance-Covariance matrix of returns\n', covReturns)

### Summary of the Workflow
1. Setup: Provide the model with historical asset returns (meanReturns) and their risk/correlation profile (covReturns), and set rules (like no short-selling and a 50% max allocation per asset).
2. Optimize: Solve for the portfolio that gives the best risk-adjusted return (maximum Sharpe Ratio).
3. Clean: Tidy up the resulting percentage allocations to make them practical.
4. Execute: Convert these optimal percentages into actual dollar amounts based on a total portfolio size of $1,000,000.

In [None]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([PORTFOLIO_VALUE * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
mvo_weights  # Dollar amount allocation

In [None]:
cleaned_weights_mean

In [None]:
LastPrice = np.array([1/p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)
Initial_Portfolio  # number of shares

In [None]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])
print(f'MVO_result:\n{MVO_result}')

##########################

# Replicate FinRL Mean Variance Optimization with PyPortfOpt

In [None]:
df = StockData.copy()
print(f'df:\n{df}')

In [None]:
import pandas as pd
import numpy as np
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import sample_cov

# Ensure your 'StockData' is a pandas DataFrame.
# If StockData is a numpy array like in your first example, convert it:
# StockData_df = pd.DataFrame(StockData, columns=[...list of tickers...])
# Replace StockData below with your DataFrame.

# 1. Calculate mean returns to match your first script
#    - compounding=False gets the simple arithmetic mean.
#    - frequency=1 prevents annualization.
mu_simple = mean_historical_return(StockData, 
                                   returns_data=False, 
                                   compounding=False, 
                                   frequency=1)

# 2. Calculate sample covariance to match your first script
#    - Use sample_cov to get the standard covariance matrix, equivalent to np.cov().
#    - frequency=1 prevents annualization.
S_simple = sample_cov(StockData, 
                      returns_data=False, 
                      frequency=1)

# 3. Scale the outputs to match your first script's format.
#    Your first script appears to work with percentage returns (e.g., 1.5 for 1.5%),
#    while PyPortfolioOpt uses decimal returns (e.g., 0.015). We must scale the results.

# Scale mean returns by 100
mu_scaled = mu_simple * 100

# Scale covariance matrix by 100*100 = 10,000
S_scaled = S_simple * 10000

# Set precision for printing results
np.set_printoptions(precision=3, suppress = True)

# Display the results
print("Modified PyPortfolioOpt Mean Returns (scaled to match):")
print(mu_scaled)
print("\nModified PyPortfolioOpt Covariance Matrix (scaled to match):")
print(S_scaled)

# PyPortfOpt Example of Mean Variance Optimization
* https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html

In [None]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(StockData, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
S = CovarianceShrinkage(StockData).ledoit_wolf()

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

In [None]:
trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

In [None]:
from pypfopt.efficient_frontier import EfficientFrontier

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))
weights = ef.max_sharpe()

In [None]:
cleaned_weights = ef.clean_weights()
ef.save_weights_to_file("weights.txt")  # saves to file
print(cleaned_weights)

In [None]:
# Create a DataFrame from the tickers and dollar amounts
allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights.keys()),
    'Weight': list(cleaned_weights.values())
})

# Show filtered out rows
filtered = allocation[allocation['Weight'] > 0.01]

print(f'allocation.head():\n{allocation.head()}')
print("\nFiltered rows (weights > 1%):")
print(filtered)

In [None]:
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

# --- Step 1: Calculate Inputs using your preferred PyPortfolioOpt methods ---
# These are the annualized, compounded mean returns and the shrunk covariance matrix.
# We assume 'StockData' is your pandas DataFrame of prices.
mu = mean_historical_return(StockData, compounding=True, frequency=252)
S = CovarianceShrinkage(StockData).ledoit_wolf()


# --- Step 2: Perform Mean-Variance Optimization ---
# We use the new mu and S. We keep the weight bounds the same for a fair comparison.
# The risk-free rate for max_sharpe defaults to 2%, which is standard.
ef_pyopt = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))

# Find the portfolio weights that maximize the Sharpe ratio
raw_weights_pyopt = ef_pyopt.max_sharpe()

# Clean the weights (rounding and clipping)
cleaned_weights_pyopt = ef_pyopt.clean_weights()
print("Optimal Weights from PyPortfolioOpt (mu, S):")
# The ef.portfolio_performance() is a great way to see the expected results
# ef_pyopt.portfolio_performance(verbose=True)
print(cleaned_weights_pyopt)


# --- Step 3: Convert Weights to Dollar Amounts ---
# Using the same total portfolio value of $1,000,000 as your original code.
portfolio_value = 1000000

# We need to get the weights in the same order as your stock columns.
# cleaned_weights_pyopt is a dictionary, so we map it to the column order.
ordered_weights = np.array([cleaned_weights_pyopt[ticker] for ticker in StockData.columns])
mvo_weights_pyopt = portfolio_value * ordered_weights

print("\nDollar Allocation:")
print(mvo_weights_pyopt)


# --- Step 4: Convert Dollar Amounts to Number of Shares ---
# This logic is identical to your original code: shares = dollar_amount / last_price
last_prices = StockData.iloc[-1].to_numpy()
initial_portfolio_pyopt = mvo_weights_pyopt / last_prices

print("\nNumber of Shares to Purchase:")
print(initial_portfolio_pyopt)


# --- Step 5: Perform the Final Matrix Multiplication ---
# This is the final step you wanted to replicate.
# It uses your 'TradeData' matrix with the new 'initial_portfolio_pyopt' vector.
portfolio_assets_pyopt = TradeData @ initial_portfolio_pyopt

# Store and display the final result in a DataFrame for clarity
pyopt_mvo_result = pd.DataFrame(portfolio_assets_pyopt, columns=["PyOpt_MVO_Result"])

print(f'\nFinal Result (TradeData @ Initial_Portfolio):\n{pyopt_mvo_result}')

In [None]:
# You can use this method instead of the simple loop for a more structured output.

# Convert the weights dictionary to a pandas Series
weights_series = pd.Series(cleaned_weights_pyopt)

# Filter for non-zero weights and sort them to see the largest allocations first
sorted_weights = weights_series[weights_series > 0].sort_values(ascending=False)

# Convert to a DataFrame for pretty printing
weights_df = sorted_weights.to_frame("Weight")

# Format the 'Weight' column to display as a percentage
weights_df["Weight"] = weights_df["Weight"].map('{:.2%}'.format)

print("\n--- Optimal Portfolio Weights (Sorted) ---")
print(weights_df)
print("-" * 35)
ef_pyopt.portfolio_performance(verbose=True)

In [None]:
##############

In [2]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)

# Use pd.read_parquet() to load a parquet file into a DataFrame
# The correct engine name is 'pyarrow'
df = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet', engine='pyarrow')

In [3]:
print(f'df:\n{df}')

df:
Ticker          A    AA   AAL  AAON   AAPL   ABBV  ABEV   ABNB    ABT  ACGL  ACHR   ACI    ACM    ACN   ACWI  ACWX   ADBE   ADC    ADI   ADM    ADP   ADSK  ADT    AEE  AEG    AEM    AEP    AER   AES    AFG    AFL  AFRM   AGCO   AGG   AGI  AGNC   AIG   AIQ  AIRR    AIT    AIZ    AJG  AKAM    AL    ALB   ALC  ALGM   ALGN   ALK    ALL   ALLE  ALLY   ALNY  ALSN    ALV    AM   AMAT  AMCR    AMD    AME   AMGN   AMH  AMLP    AMP    AMT   AMX   AMZN     AN   ANET    AON   AOS   APA    APD   APG    APH    APO    APP   APPF  APTV    AR  ARCC    ARE   ARES   ARGX  ARKK  ARMK    ARW   ASML   ASND    ASR  ASTS   ASX   ATI    ATO    ATR    AU  AUR   AVAV    AVB  AVDE  AVDV  AVEM   AVGO  AVLV  AVTR   AVUS  AVUV    AVY    AWI    AWK   AXON    AXP   AXS  AXTA    AYI   AZN     AZO     B     BA   BABA   BAC    BAH  BALL   BAM    BAP   BAX  BBAX  BBCA  BBD  BBEU  BBIN  BBIO  BBJP   BBUS  BBVA  BBWI   BBY   BCE   BCH   BCS    BDX    BE  BEKE   BEN   BEP  BEPC  BF-A  BF-B   BFAM    BG   BHP   BIDU   BII

In [4]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(df, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
S = CovarianceShrinkage(df).ledoit_wolf()

pd.set_option('display.float_format', '{:.6f}'.format)

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

mu.head():
Ticker
A      -0.076897
AA     -0.124526
AAL    -0.039457
AAON    0.228968
AAPL    0.241093
dtype: float64

S:
Ticker        A       AA      AAL     AAON     AAPL     ABBV     ABEV     ABNB      ABT     ACGL     ACHR      ACI      ACM      ACN     ACWI     ACWX     ADBE      ADC      ADI      ADM      ADP     ADSK      ADT       AEE      AEG       AEM       AEP      AER      AES      AFG      AFL     AFRM     AGCO      AGG      AGI     AGNC      AIG      AIQ     AIRR      AIT      AIZ      AJG     AKAM       AL      ALB      ALC     ALGM     ALGN      ALK      ALL     ALLE     ALLY     ALNY     ALSN      ALV       AM     AMAT     AMCR      AMD      AME     AMGN      AMH     AMLP      AMP      AMT      AMX     AMZN       AN     ANET      AON      AOS      APA      APD      APG      APH      APO      APP     APPF     APTV       AR     ARCC      ARE     ARES     ARGX     ARKK     ARMK      ARW     ASML     ASND      ASR     ASTS      ASX      ATI      ATO      ATR       AU     

In [5]:
trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

--- Annualized Returns mu ---
Ticker
A      -0.076897
AA     -0.124526
AAL    -0.039457
AAON    0.228968
AAPL    0.241093
dtype: float64

--- Converted to Daily Returns ---
Ticker
A      -0.000317
AA     -0.000528
AAL    -0.000160
AAON    0.000818
AAPL    0.000857
dtype: float64


In [6]:
# Check if all mu values are less than the default risk-free rate of 2%
all_returns_are_low = (mu < 0.02).all()
print(f"Are all expected returns below 2%? {all_returns_are_low}")

Are all expected returns below 2%? False


In [17]:
len(sorted_weights)

1518

# ##################################

#############################################  
#############################################  
#############################################  

#############################################  
#############################################  
#############################################  

In [None]:
import numpy as np
import pandas as pd
from pypfopt import risk_models, expected_returns
from pypfopt.efficient_frontier import EfficientFrontier
import sys

# ===================================================================
# PART 1: DEFINE ALL TOOLS AND FUNCTIONS
# ===================================================================

def prune_by_correlation(mu, S, threshold=0.85, verbose=False):
    """Iteratively prunes a universe by removing the lower-return asset from any
    pair with correlation > threshold, until no such pairs remain."""
    print(f"Pruning asset universe with correlation > {threshold:.0%}...")
    mu_pruned, S_pruned = mu.copy(), S.copy()
    
    while True:
        corr_matrix = risk_models.cov_to_corr(S_pruned)
        corr_unstacked = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)).stack()
        if corr_unstacked.empty or corr_unstacked.max() < threshold: break
        
        ticker1, ticker2 = corr_unstacked.idxmax()
        ticker_to_drop = ticker1 if mu_pruned[ticker1] < mu_pruned[ticker2] else ticker2
        if verbose:
            print(f"  - High correlation: ({ticker1}, {ticker2}) = {corr_unstacked.max():.2f}. Dropping '{ticker_to_drop}'.")
        mu_pruned = mu_pruned.drop(ticker_to_drop)
        S_pruned = S_pruned.drop(index=ticker_to_drop, columns=ticker_to_drop)
        
    print(f"Finished pruning. {len(mu_pruned)} tickers remain.")
    return mu_pruned, S_pruned

def filter_by_returns(mu, S, quantile_to_keep=0.50):
    """Filters a universe by keeping only the top performers by expected return."""
    print(f"\nFiltering for top {quantile_to_keep:.0%} of assets by expected return...")
    return_cutoff = mu.quantile(1 - quantile_to_keep)
    tickers_to_keep = mu[mu >= return_cutoff].index
    mu_filtered, S_filtered = mu.loc[tickers_to_keep], S.loc[tickers_to_keep, tickers_to_keep]
    print(f"Finished filtering. {len(mu_filtered)} tickers remain.")
    return mu_filtered, S_filtered

def verify_correlation(S, threshold):
    """Checks if any asset pairs have a correlation higher than the threshold."""
    print(f"\nVerifying final asset list for correlations > {threshold:.1%}...")
    sys.stdout.flush()
    corr_matrix = risk_models.cov_to_corr(S)
    corr_unstacked = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)).stack()
    if corr_unstacked.empty or corr_unstacked.max() < threshold:
        print(f"✅ VERIFICATION SUCCESS: No remaining pairs are above the {threshold:.1%} correlation threshold.")
    else:
        max_corr_pair = corr_unstacked.idxmax()
        print(f"❌ VERIFICATION FAILED: Highest correlation is {corr_unstacked.max():.2f} > {threshold:.1%}.")
    sys.stdout.flush()

def verify_returns(final_mu, reference_mu, quantile_to_keep):
    """Checks if all assets in the final list meet the return quantile criteria."""
    print(f"\nVerifying final asset list against the top {quantile_to_keep:.1%} return quantile...")
    sys.stdout.flush()
    return_cutoff = reference_mu.quantile(1 - quantile_to_keep)
    min_return_in_final_list = final_mu.min()
    if min_return_in_final_list >= return_cutoff:
        print(f"✅ VERIFICATION SUCCESS: All assets meet the return cutoff for the top {quantile_to_keep:.1%}.")
        print(f"   (Min return in list: {min_return_in_final_list:.2%}, Cutoff was: {return_cutoff:.2%})")
    else:
        print(f"❌ VERIFICATION FAILED: At least one asset is below the return cutoff ({return_cutoff:.2%}).")
    sys.stdout.flush()

try:
    # ===================================================================
    # PART 2: PRE-OPTIMIZATION FILTERING & PRIMARY PORTFOLIO GENERATION
    # ===================================================================
    
    # --- Initial Data and Parameter Setup ---
    # Assume 'df' is your full DataFrame of historical prices for ALL candidate tickers.
    # df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
    
    print(f"Starting with an initial universe of {len(df.columns)} tickers.")
    
    full_mu = expected_returns.mean_historical_return(df)
    full_S = risk_models.risk_matrix(df, method='ledoit_wolf')
    
    CORRELATION_THRESHOLD = 0.80
    RETURN_QUANTILE_TO_KEEP = 0.10
    
    mu_after_pruning, S_after_pruning = prune_by_correlation(full_mu, full_S, threshold=CORRELATION_THRESHOLD, verbose=False)
    mu_final, S_final = filter_by_returns(mu_after_pruning, S_after_pruning, quantile_to_keep=RETURN_QUANTILE_TO_KEEP)
    
    print("\n" + "="*20 + " VERIFICATION " + "="*20)
    verify_correlation(S_final, CORRELATION_THRESHOLD)
    verify_returns(mu_final, mu_after_pruning, RETURN_QUANTILE_TO_KEEP)
    print("="*54)

    # --- Run the Main Optimization ---
    print(f"\nRunning main optimizer on the final curated universe of {len(mu_final)} tickers...")
    risk_free_rate = 0.04
    ef = EfficientFrontier(mu_final, S_final, weight_bounds=(0, 0.5), solver='SCS')
    ef.risk_free_rate = risk_free_rate
    
    # ===================== THE FIX IS HERE =====================
    # 1. Run the optimization to actually compute the weights
    raw_weights = ef.max_sharpe() 
    
    # 2. NOW that weights have been computed, we can clean them.
    main_weights = ef.clean_weights()
    # ===========================================================
    
    print("SUCCESS! Main optimization complete.")
    
    print("\n--- Primary Optimized Portfolio (from curated universe) ---")
    ef.portfolio_performance(verbose=True)
    print(f"Number of tickers with non-zero weight: {len(main_weights)}")

    # ===================================================================
    # PART 3: POST-OPTIMIZATION SENSITIVITY ANALYSIS
    # ===================================================================
    
    summary_data = []
    print("\n\n" + "="*50)
    print("      POST-OPTIMIZATION SENSITIVITY ANALYSIS")
    print("="*50)
    
    threshold_range = np.arange(0, 0.051, 0.005)

    for threshold in threshold_range:
        filtered_weights = {ticker: weight for ticker, weight in main_weights.items() if weight >= threshold}
        if not filtered_weights: continue
        total_filtered_weight = sum(filtered_weights.values())
        rebalanced_weights = {ticker: weight / total_filtered_weight for ticker, weight in filtered_weights.items()}
        rebalanced_weights_series = pd.Series(rebalanced_weights).reindex(mu_final.index).fillna(0)
        p_return = np.dot(rebalanced_weights_series, mu_final)
        p_volatility = np.sqrt(np.dot(rebalanced_weights_series.T, np.dot(S_final, rebalanced_weights_series)))
        p_sharpe = (p_return - risk_free_rate) / p_volatility
        summary_data.append({
            "Min Weight": f"{threshold:.1%}", "Num Tickers": len(rebalanced_weights),
            "Return": f"{p_return:.2%}", "Volatility": f"{p_volatility:.2%}",
            "Sharpe Ratio": f"{p_sharpe:.2f}"
        })

    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))
    print("="*50)

except Exception as e:
    print(f"\n\nFATAL ERROR during processing: {e}")

Starting with an initial universe of 1518 tickers.
Pruning asset universe with correlation > 80%...
Finished pruning. 1422 tickers remain.

Filtering for top 10% of assets by expected return...
Finished filtering. 143 tickers remain.


Verifying final asset list for correlations > 80.0%...
✅ VERIFICATION SUCCESS: No remaining pairs are above the 80.0% correlation threshold.

Verifying final asset list against the top 10.0% return quantile...
✅ VERIFICATION SUCCESS: All assets meet the return cutoff for the top 10.0%.
   (Min return in list: 46.51%, Cutoff was: 46.51%)

Running main optimizer on the final curated universe of 143 tickers...
SUCCESS! Main optimization complete.

--- Primary Optimized Portfolio (from curated universe) ---
Expected annual return: 153.2%
Annual volatility: 24.8%
Sharpe Ratio: 6.17
Number of tickers with non-zero weight: 143


      POST-OPTIMIZATION SENSITIVITY ANALYSIS
Min Weight  Num Tickers  Return Volatility Sharpe Ratio
      0.0%          143 153.21%  

  warn(
  warn(


In [38]:
# ===================================================================
# PART 2: RECALL AND DISPLAY A SPECIFIC PORTFOLIO'S WEIGHTS
# ===================================================================

def display_portfolio_for_threshold(portfolios_data, target_threshold):
    """
    Retrieves and prints the tickers and weights for a specific threshold
    from the stored portfolio data.

    Args:
        portfolios_data (dict): The dictionary containing all portfolio weights,
                                with thresholds as keys.
        target_threshold (float): The specific threshold to look up (e.g., 0.015 for 1.5%).
    """
    print(f"\n--- Recalling Portfolio for Threshold: {target_threshold:.1%} ---")

    # Check if the requested threshold exists in our stored data
    if target_threshold in portfolios_data:
        # Retrieve the specific portfolio dictionary
        portfolio = portfolios_data[target_threshold]
        
        print(f"Number of tickers: {len(portfolio)}")
        
        # Sort the tickers by weight for clear presentation
        sorted_portfolio = sorted(portfolio.items(), key=lambda item: item[1], reverse=True)
        
        print("Asset Allocation:")
        for ticker, weight in sorted_portfolio:
            print(f"  - {ticker}: {weight:.2%}")
    else:
        # Handle cases where the threshold was not valid or resulted in an empty portfolio
        print(f"ERROR: No data found for threshold {target_threshold:.1%}.")
        # To help the user, show them which thresholds are available
        available_keys = [f"{key:.1%}" for key in portfolios_data.keys()]
        print(f"Available thresholds are: {', '.join(available_keys)}")


# --- DEMONSTRATION OF HOW TO USE THE FUNCTION ---
# This part would run after the main analysis is complete.
# We check if 'all_portfolios' was successfully created before trying to use it.
if 'all_portfolios' in locals():
    # Example 1: Recall the portfolio for the 1.5% threshold
    display_portfolio_for_threshold(all_portfolios, 0.045)
    
    # Example 2: Recall the portfolio for the 4.0% threshold
    display_portfolio_for_threshold(all_portfolios, 0.05)
    
    # # Example 3: Try to recall a threshold that doesn't exist to show error handling
    # display_portfolio_for_threshold(all_portfolios, 0.033)


--- Recalling Portfolio for Threshold: 4.5% ---
Number of tickers: 12
Asset Allocation:
  - APP: 12.31%
  - GBTC: 12.25%
  - TIGO: 9.76%
  - SFM: 9.07%
  - CVNA: 8.61%
  - ESLT: 8.42%
  - EXEL: 7.74%
  - SPOT: 7.45%
  - COKE: 7.28%
  - PLTR: 6.05%
  - CLS: 5.79%
  - KTOS: 5.27%

--- Recalling Portfolio for Threshold: 5.0% ---
Number of tickers: 10
Asset Allocation:
  - APP: 13.85%
  - GBTC: 13.77%
  - TIGO: 10.98%
  - SFM: 10.20%
  - CVNA: 9.68%
  - ESLT: 9.47%
  - EXEL: 8.70%
  - SPOT: 8.38%
  - COKE: 8.18%
  - PLTR: 6.81%


In [24]:
import numpy as np
import pandas as pd
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)


# --- 1. Initial Optimization (Done only once) ---

print("Constructing the optimizer and running max_sharpe...")

risk_free_rate = 0.04
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')
ef.risk_free_rate = risk_free_rate

try:
    weights = ef.max_sharpe()
    cleaned_weights = ef.clean_weights()
    print("SUCCESS! Initial optimization complete.\n")

    print("--- Original Portfolio (No Filtering) ---")
    ef.portfolio_performance(verbose=True)
    print(f"Number of tickers: {len(cleaned_weights)}")

    # --- 2. Iteration, Analysis, and Storage ---

    summary_data = []
    
    # === MODIFICATION 1: Initialize storage for all portfolios ===
    # This dictionary will hold the portfolio weights for each threshold.
    # The key will be the threshold (e.g., 0.015), and the value will be the
    # dictionary of tickers and their rebalanced weights.
    all_portfolios = {}

    threshold_range = np.arange(0, 0.051, 0.005)

    print("\n--- Running Analysis for Different Weight Thresholds ---")

    for threshold in threshold_range:
        filtered_weights = {
            ticker: weight
            for ticker, weight in cleaned_weights.items()
            if weight >= threshold
        }
        
        if not filtered_weights:
            continue

        total_filtered_weight = sum(filtered_weights.values())
        rebalanced_weights = {
            ticker: weight / total_filtered_weight
            for ticker, weight in filtered_weights.items()
        }

        # === MODIFICATION 2: Store the rebalanced portfolio ===
        # We use the threshold as the key for easy retrieval later.
        all_portfolios[threshold] = rebalanced_weights

        # --- The rest of the performance calculation continues as before ---
        rebalanced_weights_series = pd.Series(rebalanced_weights).reindex(mu.index).fillna(0)
        p_return = np.dot(rebalanced_weights_series, mu)
        p_volatility = np.sqrt(np.dot(rebalanced_weights_series.T, np.dot(S, rebalanced_weights_series)))
        p_sharpe = (p_return - risk_free_rate) / p_volatility

        summary_data.append({
            "Threshold": f"{threshold:.1%}",
            "Num Tickers": len(rebalanced_weights),
            "Return": f"{p_return:.2%}",
            "Volatility": f"{p_volatility:.2%}",
            "Sharpe Ratio": f"{p_sharpe:.2f}"
        })

    # --- 3. Display Final Summary Table ---
    
    summary_df = pd.DataFrame(summary_data)
    
    print("\n\n" + "="*50)
    print("      PORTFOLIO FILTERING SENSITIVITY ANALYSIS")
    print("="*50)
    print(summary_df.to_string(index=False))
    print("="*50)
    
    # At this point, the `all_portfolios` dictionary is populated and ready for use.
    # We will pass it to the code in Part 2.

except Exception as e:
    print(f"\nERROR: Optimization or calculation failed. Error: {e}")

Constructing the optimizer and running max_sharpe...


  warn(
  warn(


SUCCESS! Initial optimization complete.

--- Original Portfolio (No Filtering) ---
Expected annual return: 32.7%
Annual volatility: 4.6%
Sharpe Ratio: 7.05
Number of tickers: 1518

--- Running Analysis for Different Weight Thresholds ---


      PORTFOLIO FILTERING SENSITIVITY ANALYSIS
Threshold  Num Tickers Return Volatility Sharpe Ratio
     0.0%         1518 32.70%      4.64%         6.19
     0.5%           51 31.98%      4.58%         6.10
     1.0%           36 28.21%      4.20%         5.77
     1.5%           26 18.43%      3.40%         4.24
     2.0%           19  8.45%      3.09%         1.44
     2.5%           18  8.60%      3.17%         1.45
     3.0%           17  8.75%      3.28%         1.45
     3.5%            3 16.27%     10.03%         1.22
     4.0%            1 30.79%     21.98%         1.22
     4.5%            1 30.79%     21.98%         1.22
     5.0%            1 30.79%     21.98%         1.22


In [25]:
# ===================================================================
# PART 2: RECALL AND DISPLAY A SPECIFIC PORTFOLIO'S WEIGHTS
# ===================================================================

def display_portfolio_for_threshold(portfolios_data, target_threshold):
    """
    Retrieves and prints the tickers and weights for a specific threshold
    from the stored portfolio data.

    Args:
        portfolios_data (dict): The dictionary containing all portfolio weights,
                                with thresholds as keys.
        target_threshold (float): The specific threshold to look up (e.g., 0.015 for 1.5%).
    """
    print(f"\n--- Recalling Portfolio for Threshold: {target_threshold:.1%} ---")

    # Check if the requested threshold exists in our stored data
    if target_threshold in portfolios_data:
        # Retrieve the specific portfolio dictionary
        portfolio = portfolios_data[target_threshold]
        
        print(f"Number of tickers: {len(portfolio)}")
        
        # Sort the tickers by weight for clear presentation
        sorted_portfolio = sorted(portfolio.items(), key=lambda item: item[1], reverse=True)
        
        print("Asset Allocation:")
        for ticker, weight in sorted_portfolio:
            print(f"  - {ticker}: {weight:.2%}")
    else:
        # Handle cases where the threshold was not valid or resulted in an empty portfolio
        print(f"ERROR: No data found for threshold {target_threshold:.1%}.")
        # To help the user, show them which thresholds are available
        available_keys = [f"{key:.1%}" for key in portfolios_data.keys()]
        print(f"Available thresholds are: {', '.join(available_keys)}")


# --- DEMONSTRATION OF HOW TO USE THE FUNCTION ---
# This part would run after the main analysis is complete.
# We check if 'all_portfolios' was successfully created before trying to use it.
if 'all_portfolios' in locals():
    # Example 1: Recall the portfolio for the 1.5% threshold
    display_portfolio_for_threshold(all_portfolios, 0.005)
    
    # Example 2: Recall the portfolio for the 4.0% threshold
    display_portfolio_for_threshold(all_portfolios, 0.010)
    
    # # Example 3: Try to recall a threshold that doesn't exist to show error handling
    # display_portfolio_for_threshold(all_portfolios, 0.033)


--- Recalling Portfolio for Threshold: 0.5% ---
Number of tickers: 51
Asset Allocation:
  - CBOE: 5.29%
  - PULS: 3.73%
  - MINT: 3.67%
  - TFLO: 3.53%
  - JAAA: 3.53%
  - ICSH: 3.43%
  - SGOV: 3.37%
  - USFR: 3.33%
  - BIL: 3.32%
  - BOXX: 3.28%
  - TBIL: 3.26%
  - BILS: 3.25%
  - COR: 3.23%
  - JPST: 3.23%
  - SHV: 3.22%
  - FTSM: 3.17%
  - GBIL: 3.16%
  - VUSB: 2.68%
  - SPTS: 2.20%
  - VGSH: 1.86%
  - JMST: 1.85%
  - FLOT: 1.84%
  - SCHO: 1.82%
  - APP: 1.79%
  - LMBS: 1.72%
  - GBTC: 1.71%
  - KR: 1.49%
  - SPSB: 1.34%
  - CVNA: 1.28%
  - CLS: 1.27%
  - VTIP: 1.25%
  - SHY: 1.22%
  - CME: 1.22%
  - NVDA: 1.20%
  - DFSD: 1.14%
  - STIP: 1.10%
  - PLTR: 0.93%
  - TIGO: 0.93%
  - GLDM: 0.89%
  - MCK: 0.88%
  - SPOT: 0.81%
  - EXEL: 0.81%
  - K: 0.79%
  - BSV: 0.79%
  - SGOL: 0.74%
  - SUB: 0.72%
  - SFM: 0.61%
  - COKE: 0.56%
  - ESLT: 0.53%
  - PHYS: 0.52%
  - COOP: 0.52%

--- Recalling Portfolio for Threshold: 1.0% ---
Number of tickers: 36
Asset Allocation:
  - CBOE: 5.94%
  - PU

In [23]:
import numpy as np
import pandas as pd
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)


# --- 1. Initial Optimization (Done only once) ---

print("Constructing the optimizer and running max_sharpe...")

risk_free_rate = 0.02
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')
ef.risk_free_rate = risk_free_rate

try:
    # Get the raw weights from the optimizer
    weights = ef.max_sharpe()
    # Get the weights cleaned of near-zero values
    cleaned_weights = ef.clean_weights()
    print("SUCCESS! Initial optimization complete.\n")

    print("--- Original Portfolio (No Filtering) ---")
    ef.portfolio_performance(verbose=True)
    print(f"Number of tickers: {len(cleaned_weights)}")


    # --- 2. Iteration and Sensitivity Analysis ---

    # A list to store the results from each iteration
    summary_data = []

    # Define the range of thresholds to test: 0% to 5% with a 0.5% step.
    # np.arange is used because it works well with float steps.
    # We use 0.051 to ensure 0.05 is included in the range.
    threshold_range = np.arange(0, 0.051, 0.005)

    print("\n--- Running Analysis for Different Weight Thresholds ---")

    for threshold in threshold_range:
        # Filter the dictionary to keep only tickers meeting the threshold
        filtered_weights = {
            ticker: weight
            for ticker, weight in cleaned_weights.items()
            if weight >= threshold
        }
        
        # If the threshold is so high that no assets remain, skip to the next iteration
        if not filtered_weights:
            print(f"Threshold {threshold:.1%} resulted in an empty portfolio. Skipping.")
            continue

        # Rebalance the filtered weights to sum to 100%
        total_filtered_weight = sum(filtered_weights.values())
        rebalanced_weights = {
            ticker: weight / total_filtered_weight
            for ticker, weight in filtered_weights.items()
        }

        # Create an aligned pandas Series for calculation
        rebalanced_weights_series = pd.Series(rebalanced_weights).reindex(mu.index).fillna(0)

        # Calculate new portfolio performance metrics
        p_return = np.dot(rebalanced_weights_series, mu)
        p_volatility = np.sqrt(np.dot(rebalanced_weights_series.T, np.dot(S, rebalanced_weights_series)))
        p_sharpe = (p_return - risk_free_rate) / p_volatility

        # Store the results in our list
        summary_data.append({
            "Threshold": f"{threshold:.1%}",
            "Num Tickers": len(rebalanced_weights),
            "Return": f"{p_return:.2%}",
            "Volatility": f"{p_volatility:.2%}",
            "Sharpe Ratio": f"{p_sharpe:.2f}"
        })

    # --- 3. Display Final Summary Table ---
    
    # Convert the list of dictionaries into a pandas DataFrame for a clean table format
    summary_df = pd.DataFrame(summary_data)
    
    print("\n\n" + "="*50)
    print("      PORTFOLIO FILTERING SENSITIVITY ANALYSIS")
    print("="*50)
    print(summary_df.to_string(index=False))
    print("="*50)


except Exception as e:
    print(f"\nERROR: Optimization or calculation failed. Error: {e}")

Constructing the optimizer and running max_sharpe...
SUCCESS! Initial optimization complete.

--- Original Portfolio (No Filtering) ---
Expected annual return: 32.7%
Annual volatility: 4.6%
Sharpe Ratio: 7.05
Number of tickers: 1518

--- Running Analysis for Different Weight Thresholds ---


      PORTFOLIO FILTERING SENSITIVITY ANALYSIS
Threshold  Num Tickers Return Volatility Sharpe Ratio
     0.0%         1518 32.70%      4.64%         6.62
     0.5%           51 31.98%      4.58%         6.54
     1.0%           36 28.21%      4.20%         6.24
     1.5%           26 18.43%      3.40%         4.83
     2.0%           19  8.45%      3.09%         2.09
     2.5%           18  8.60%      3.17%         2.08
     3.0%           17  8.75%      3.28%         2.06
     3.5%            3 16.27%     10.03%         1.42
     4.0%            1 30.79%     21.98%         1.31
     4.5%            1 30.79%     21.98%         1.31
     5.0%            1 30.79%     21.98%         1.31


In [22]:
import numpy as np
import pandas as pd
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)

print("Constructing the optimizer with the 'SCS' solver from the start...")

# We need the risk-free rate later, so let's set it here if not default
risk_free_rate = 0.02
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')
# Set the risk_free_rate on the ef instance
ef.risk_free_rate = risk_free_rate


try:
    weights = ef.max_sharpe()
    print("\nSUCCESS! Optimization completed with 'SCS'.\n")

    print("--- Original Portfolio ---")
    ef.portfolio_performance(verbose=True)

    # Get the initial weights cleaned of near-zero values
    cleaned_weights = ef.clean_weights()

    # --- FILTERING AND REBALANCING LOGIC ---

    # 1. Define the minimum weight threshold
    weight_threshold = 0.01

    # 2. Filter the dictionary
    filtered_weights = {
        ticker: weight
        for ticker, weight in cleaned_weights.items()
        if weight >= weight_threshold
    }

    # 3. Rebalance the filtered weights to sum to 100%
    total_filtered_weight = sum(filtered_weights.values())
    rebalanced_weights = {
        ticker: weight / total_filtered_weight
        for ticker, weight in filtered_weights.items()
    }
    
    print("\n--- Filtered & Rebalanced Portfolio ---")
    
    num_tickers = len(rebalanced_weights)
    print(f"Found and rebalanced {num_tickers} tickers with original weight >= {weight_threshold:.0%}")
    
    # Sort and print the final assets for clarity
    sorted_weights = sorted(rebalanced_weights.items(), key=lambda item: item[1], reverse=True)
    print("\nFinal portfolio allocation:")
    for ticker, weight in sorted_weights:
        print(f"  - {ticker}: {weight:.2%}")
    

    # === MODIFICATION START: CALCULATE NEW PERFORMANCE ===

    # To calculate performance, we need the weights as a pandas Series
    # aligned with the original `mu` and `S` data.
    # We create a Series from our rebalanced dictionary and reindex it, filling
    # missing tickers (those we filtered out) with a weight of 0.
    rebalanced_weights_series = pd.Series(rebalanced_weights).reindex(mu.index).fillna(0)

    # Calculate new portfolio performance metrics
    expected_annual_return = np.dot(rebalanced_weights_series, mu)
    
    # Portfolio variance = w^T * S * w
    # Annual volatility (std dev) is the sqrt of variance
    annual_volatility = np.sqrt(np.dot(rebalanced_weights_series.T, np.dot(S, rebalanced_weights_series)))
    
    # Sharpe Ratio
    sharpe_ratio = (expected_annual_return - risk_free_rate) / annual_volatility

    print("\nFiltered & Rebalanced Portfolio Performance:")
    print(f"Expected annual return: {expected_annual_return:.1%}")
    print(f"Annual volatility: {annual_volatility:.1%}")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

    # === MODIFICATION END ===

except Exception as e:
    print(f"\nERROR: Optimization or calculation failed. Error: {e}")

Constructing the optimizer with the 'SCS' solver from the start...

SUCCESS! Optimization completed with 'SCS'.

--- Original Portfolio ---
Expected annual return: 32.7%
Annual volatility: 4.6%
Sharpe Ratio: 7.05

--- Filtered & Rebalanced Portfolio ---
Found and rebalanced 36 tickers with original weight >= 1%

Final portfolio allocation:
  - CBOE: 5.94%
  - PULS: 4.19%
  - MINT: 4.13%
  - TFLO: 3.97%
  - JAAA: 3.97%
  - ICSH: 3.86%
  - SGOV: 3.78%
  - USFR: 3.75%
  - BIL: 3.73%
  - BOXX: 3.68%
  - TBIL: 3.66%
  - BILS: 3.65%
  - COR: 3.63%
  - JPST: 3.63%
  - SHV: 3.62%
  - FTSM: 3.56%
  - GBIL: 3.55%
  - VUSB: 3.02%
  - SPTS: 2.47%
  - VGSH: 2.09%
  - JMST: 2.08%
  - FLOT: 2.07%
  - SCHO: 2.05%
  - APP: 2.01%
  - LMBS: 1.93%
  - GBTC: 1.92%
  - KR: 1.67%
  - SPSB: 1.51%
  - CVNA: 1.44%
  - CLS: 1.42%
  - VTIP: 1.41%
  - SHY: 1.37%
  - CME: 1.37%
  - NVDA: 1.35%
  - DFSD: 1.28%
  - STIP: 1.23%

Filtered & Rebalanced Portfolio Performance:
Expected annual return: 28.2%
Annual volatili

In [20]:
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# import pandas as pd
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)

print("Constructing the optimizer with the 'SCS' solver from the start...")

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')

try:
    weights = ef.max_sharpe()
    print("\nSUCCESS! Optimization completed with 'SCS'.\n")

    # Get the initial weights cleaned of near-zero values
    cleaned_weights = ef.clean_weights()

    # === MODIFICATION START ===

    # 1. Define the minimum weight threshold (5% = 0.05)
    weight_threshold = 0.01

    # 2. Filter the dictionary to keep only tickers meeting the threshold
    # This is a dictionary comprehension. It creates a new dictionary by iterating
    # over the items of cleaned_weights and including them only if the weight >= threshold.
    filtered_weights = {
        ticker: weight
        for ticker, weight in cleaned_weights.items()
        if weight >= weight_threshold
    }

    # 3. Print the count of tickers after applying the 5% filter
    num_tickers_in_portfolio = len(filtered_weights)
    print(f"Found {num_tickers_in_portfolio} tickers with weight >= {weight_threshold:.0%}")
    
    # -- Optional but Recommended: Rebalance the filtered weights to sum to 100% --
    # After filtering, the sum of weights will be less than 1. To make it a valid
    # portfolio again, we can rebalance them.
    total_filtered_weight = sum(filtered_weights.values())
    rebalanced_weights = {
        ticker: weight / total_filtered_weight
        for ticker, weight in filtered_weights.items()
    }

    print(f"\nOriginal sum of filtered weights: {total_filtered_weight:.2%}")
    print(f"Rebalanced sum: {sum(rebalanced_weights.values()):.2%}")
    
    # Sort the final, rebalanced tickers by weight for clear presentation
    sorted_weights = sorted(rebalanced_weights.items(), key=lambda item: item[1], reverse=True)
    
    print("\nFinal assets in portfolio (filtered and rebalanced):")
    # Print each ticker and its rebalanced weight
    for ticker, weight in sorted_weights:
        print(f"  - {ticker}: {weight:.2%}")

    # === MODIFICATION END ===

    print("\nOriginal Portfolio Performance (before filtering):")
    ef.portfolio_performance(verbose=True)
    # Note: The portfolio performance is for the *original* set of weights,
    # not the newly filtered and rebalanced one. You would need to recalculate
    # performance metrics using your `rebalanced_weights` dictionary if needed.

except Exception as e:
    print(f"\nERROR: Optimization failed again, which is highly unexpected. Error: {e}")

Constructing the optimizer with the 'SCS' solver from the start...

SUCCESS! Optimization completed with 'SCS'.

Found 36 tickers with weight >= 1%

Original sum of filtered weights: 85.85%
Rebalanced sum: 100.00%

Final assets in portfolio (filtered and rebalanced):
  - CBOE: 5.94%
  - PULS: 4.19%
  - MINT: 4.13%
  - TFLO: 3.97%
  - JAAA: 3.97%
  - ICSH: 3.86%
  - SGOV: 3.78%
  - USFR: 3.75%
  - BIL: 3.73%
  - BOXX: 3.68%
  - TBIL: 3.66%
  - BILS: 3.65%
  - COR: 3.63%
  - JPST: 3.63%
  - SHV: 3.62%
  - FTSM: 3.56%
  - GBIL: 3.55%
  - VUSB: 3.02%
  - SPTS: 2.47%
  - VGSH: 2.09%
  - JMST: 2.08%
  - FLOT: 2.07%
  - SCHO: 2.05%
  - APP: 2.01%
  - LMBS: 1.93%
  - GBTC: 1.92%
  - KR: 1.67%
  - SPSB: 1.51%
  - CVNA: 1.44%
  - CLS: 1.42%
  - VTIP: 1.41%
  - SHY: 1.37%
  - CME: 1.37%
  - NVDA: 1.35%
  - DFSD: 1.28%
  - STIP: 1.23%

Original Portfolio Performance (before filtering):
Expected annual return: 32.7%
Annual volatility: 4.6%
Sharpe Ratio: 7.05


In [18]:
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# import pandas as pd
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)


print("Constructing the optimizer with the 'SCS' solver from the start...")

# Create the EfficientFrontier instance, specifying the solver AT CREATION TIME.
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')

try:
    # Now, the max_sharpe call will correctly use the SCS solver.
    weights = ef.max_sharpe()
    
    print("\nSUCCESS! Optimization completed with 'SCS'.\n")
    
    # === MODIFICATION START ===

    # 1. Filter out tickers with 0 weight.
    # The clean_weights() method automatically removes assets with weights below a certain cutoff (effectively zero).
    # The returned dictionary `cleaned_weights` therefore already contains only the assets in the portfolio.
    cleaned_weights = ef.clean_weights()
    
    # 2. Print the count of tickers after the filter.
    num_tickers_in_portfolio = len(cleaned_weights)
    print(f"Portfolio construction complete. Number of tickers with weight > 0: {num_tickers_in_portfolio}")
    
    # Sort the tickers by weight in descending order for clear presentation
    sorted_weights = sorted(cleaned_weights.items(), key=lambda item: item[1], reverse=True)
    
    print("\nAssets sorted by weight (descending):")
    # Print each ticker and its weight, formatted as a percentage
    for ticker, weight in sorted_weights:
        print(f"  - {ticker}: {weight:.2%}")

    # === MODIFICATION END ===

    print("\nPortfolio Performance:")
    ef.portfolio_performance(verbose=True)

except Exception as e:
    print(f"\nERROR: Optimization failed again, which is highly unexpected. Error: {e}")

Constructing the optimizer with the 'SCS' solver from the start...


  warn(
  warn(



SUCCESS! Optimization completed with 'SCS'.

Portfolio construction complete. Number of tickers with weight > 0: 1518

Assets sorted by weight (descending):
  - CBOE: 5.10%
  - PULS: 3.60%
  - MINT: 3.54%
  - TFLO: 3.41%
  - JAAA: 3.40%
  - ICSH: 3.31%
  - SGOV: 3.25%
  - USFR: 3.22%
  - BIL: 3.20%
  - BOXX: 3.16%
  - TBIL: 3.14%
  - BILS: 3.14%
  - COR: 3.12%
  - JPST: 3.12%
  - SHV: 3.11%
  - FTSM: 3.06%
  - GBIL: 3.05%
  - VUSB: 2.59%
  - SPTS: 2.12%
  - VGSH: 1.79%
  - JMST: 1.78%
  - FLOT: 1.78%
  - SCHO: 1.76%
  - APP: 1.73%
  - LMBS: 1.66%
  - GBTC: 1.65%
  - KR: 1.44%
  - SPSB: 1.30%
  - CVNA: 1.24%
  - CLS: 1.22%
  - VTIP: 1.21%
  - SHY: 1.18%
  - CME: 1.18%
  - NVDA: 1.16%
  - DFSD: 1.10%
  - STIP: 1.06%
  - PLTR: 0.90%
  - TIGO: 0.89%
  - GLDM: 0.86%
  - MCK: 0.85%
  - SPOT: 0.79%
  - EXEL: 0.78%
  - K: 0.77%
  - BSV: 0.76%
  - SGOL: 0.72%
  - SUB: 0.69%
  - SFM: 0.59%
  - COKE: 0.54%
  - ESLT: 0.52%
  - PHYS: 0.50%
  - COOP: 0.50%
  - KTOS: 0.47%
  - BSCQ: 0.45%
  - IAU: 0

In [15]:
from pypfopt.efficient_frontier import EfficientFrontier

# Assume 'mu' and 'S' are already defined, e.g.,
# from pypfopt import risk_models, expected_returns
# from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
# import pandas as pd
# df = pd.read_csv("tests/resources/stock_prices.csv", parse_dates=True, index_col="date")
# mu = expected_returns.mean_historical_return(df)
# S = risk_models.sample_cov(df)


print("Constructing the optimizer with the 'SCS' solver from the start...")

# Create the EfficientFrontier instance, specifying the solver AT CREATION TIME.
# We don't need verbose=True anymore unless this also fails.
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')

try:
    # Now, the max_sharpe call will correctly use the SCS solver.
    weights = ef.max_sharpe()
    
    print("\nSUCCESS! Optimization completed with 'SCS'.\n")
    
    # Print the results
    cleaned_weights = ef.clean_weights()
    print("Raw Cleaned Weights (Dictionary):")
    print(cleaned_weights)

    # === MODIFICATION START ===

    # Get the number of tickers that have a non-zero weight
    num_tickers = len(cleaned_weights)
    print(f"\nPortfolio contains {num_tickers} tickers with non-zero weights.")
    
    # Sort the dictionary of weights in descending order
    # .items() converts the dictionary to a list of (key, value) tuples
    # The `key` argument in sorted() tells it to use the second element of each tuple (the weight) for sorting
    # `reverse=True` sorts from highest to lowest
    sorted_weights = sorted(cleaned_weights.items(), key=lambda item: item[1], reverse=True)

    print("\nTickers sorted by weight (descending):")
    for ticker, weight in sorted_weights:
        # Print each ticker and its weight, formatted as a percentage
        print(f"  {ticker}: {weight:.2%}")

    # === MODIFICATION END ===
    
    print("\nPortfolio Performance:")
    ef.portfolio_performance(verbose=True)

except Exception as e:
    print(f"\nERROR: Optimization failed again, which is highly unexpected. Error: {e}")

Constructing the optimizer with the 'SCS' solver from the start...

SUCCESS! Optimization completed with 'SCS'.

Raw Cleaned Weights (Dictionary):
OrderedDict([('A', 0.0), ('AA', 0.0), ('AAL', 0.0), ('AAON', 0.0), ('AAPL', 0.0), ('ABBV', 0.0), ('ABEV', 0.0), ('ABNB', 0.0), ('ABT', 0.0), ('ACGL', 0.0), ('ACHR', 0.0), ('ACI', 0.0), ('ACM', 0.0), ('ACN', 0.0), ('ACWI', 0.0), ('ACWX', 0.0), ('ADBE', 0.0), ('ADC', 0.0), ('ADI', 0.0), ('ADM', 0.0), ('ADP', 0.0), ('ADSK', 0.0), ('ADT', 0.0), ('AEE', 0.0), ('AEG', 0.0), ('AEM', 0.0), ('AEP', 0.0), ('AER', 0.0), ('AES', 0.0), ('AFG', 0.0), ('AFL', 0.0), ('AFRM', 0.0), ('AGCO', 0.0), ('AGG', 0.0), ('AGI', 0.0), ('AGNC', 0.0), ('AIG', 0.0), ('AIQ', 0.0), ('AIRR', 0.0), ('AIT', 0.0), ('AIZ', 0.0), ('AJG', 0.0), ('AKAM', 0.0), ('AL', 0.0), ('ALB', 0.0), ('ALC', 0.0), ('ALGM', 0.0), ('ALGN', 0.0), ('ALK', 0.0), ('ALL', 0.0), ('ALLE', 0.0), ('ALLY', 0.0), ('ALNY', 0.0), ('ALSN', 0.0), ('ALV', 0.0), ('AM', 0.0), ('AMAT', 0.0), ('AMCR', 0.0), ('AMD', 0

In [7]:
from pypfopt.efficient_frontier import EfficientFrontier

print("Constructing the optimizer with the 'SCS' solver from the start...")

# Create the EfficientFrontier instance, specifying the solver AT CREATION TIME.
# We don't need verbose=True anymore unless this also fails.
ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5), solver='SCS')

try:
    # Now, the max_sharpe call will correctly use the SCS solver.
    weights = ef.max_sharpe()
    
    print("\nSUCCESS! Optimization completed with 'SCS'.\n")
    
    # Print the results
    cleaned_weights = ef.clean_weights()
    print("Cleaned Weights:")
    print(cleaned_weights)
    
    print("\nPortfolio Performance:")
    ef.portfolio_performance(verbose=True)

except Exception as e:
    print(f"\nERROR: Optimization failed again, which is highly unexpected. Error: {e}")

Constructing the optimizer with the 'SCS' solver from the start...


  warn(
  warn(



SUCCESS! Optimization completed with 'SCS'.

Cleaned Weights:
OrderedDict([('A', 0.0), ('AA', 0.0), ('AAL', 0.0), ('AAON', 0.0), ('AAPL', 0.0), ('ABBV', 0.0), ('ABEV', 0.0), ('ABNB', 0.0), ('ABT', 0.0), ('ACGL', 0.0), ('ACHR', 0.0), ('ACI', 0.0), ('ACM', 0.0), ('ACN', 0.0), ('ACWI', 0.0), ('ACWX', 0.0), ('ADBE', 0.0), ('ADC', 0.0), ('ADI', 0.0), ('ADM', 0.0), ('ADP', 0.0), ('ADSK', 0.0), ('ADT', 0.0), ('AEE', 0.0), ('AEG', 0.0), ('AEM', 0.0), ('AEP', 0.0), ('AER', 0.0), ('AES', 0.0), ('AFG', 0.0), ('AFL', 0.0), ('AFRM', 0.0), ('AGCO', 0.0), ('AGG', 0.0), ('AGI', 0.0), ('AGNC', 0.0), ('AIG', 0.0), ('AIQ', 0.0), ('AIRR', 0.0), ('AIT', 0.0), ('AIZ', 0.0), ('AJG', 0.0), ('AKAM', 0.0), ('AL', 0.0), ('ALB', 0.0), ('ALC', 0.0), ('ALGM', 0.0), ('ALGN', 0.0), ('ALK', 0.0), ('ALL', 0.0), ('ALLE', 0.0), ('ALLY', 0.0), ('ALNY', 0.0), ('ALSN', 0.0), ('ALV', 0.0), ('AM', 0.0), ('AMAT', 0.0), ('AMCR', 0.0), ('AMD', 0.0), ('AME', 0.0), ('AMGN', 0.0), ('AMH', 0.0), ('AMLP', 0.0), ('AMP', 0.0), ('AMT',

In [9]:
# Assume cleaned_weights, StockData, and PORTFOLIO_VALUE are defined

# --- Step 3 & 4: Calculate Full Portfolio (for matrix multiplication) ---

# Get weights in the correct order for vectorized operations
ordered_weights = np.array([cleaned_weights.get(ticker, 0) for ticker in df.columns])
mvo_weights = PORTFOLIO_VALUE * ordered_weights

# --- THIS IS THE KEY FIX ---
# Get last prices as a PANDAS SERIES for lookups by ticker name
last_prices_series = df.iloc[-1]
# Get last prices as a NUMPY ARRAY for fast division
last_prices_np = last_prices_series.to_numpy()
# -------------------------

# Calculate the full array of shares using the numpy array
initial_portfolio = mvo_weights / last_prices_np


# --- NEW, CORRECTED: Code to print a clear, filtered summary ---

print("\n" + "="*50)
print("          Detailed Portfolio Allocation          ")
print("="*50)

portfolio_details = []

# Iterate through the cleaned_weights dictionary (ticker-weight pairs)
for ticker, weight in cleaned_weights.items():
    if weight > 0:
        dollar_value = PORTFOLIO_VALUE * weight
        
        # Look up the price from the PANDAS SERIES using the ticker string
        last_price = last_prices_series[ticker]
        
        shares_to_buy = dollar_value / last_price
        
        portfolio_details.append({
            "Ticker": ticker,
            "Weight": weight,
            "Dollar Value": dollar_value,
            "Shares to Purchase": shares_to_buy
        })

# Convert the list of details into a pandas DataFrame for printing
if portfolio_details:
    results_df = pd.DataFrame(portfolio_details).set_index("Ticker")

    # Format the DataFrame for nice printing
    display_df = results_df.copy()
    display_df["Weight"] = display_df["Weight"].map('{:.2%}'.format)
    display_df["Dollar Value"] = display_df["Dollar Value"].map('${:,.2f}'.format)
    display_df["Shares to Purchase"] = display_df["Shares to Purchase"].map('{:.4f}'.format)
    
    print(display_df)
else:
    print("The optimization resulted in zero allocation to all assets.")

print("="*50)

# The 'initial_portfolio' numpy array is still correct and ready for your next step
print("\nFull 'initial_portfolio' array (for matrix multiplication):")
print(initial_portfolio)


          Detailed Portfolio Allocation          
       Weight Dollar Value Shares to Purchase
Ticker                                       
APP     1.73%   $17,260.00            47.3994
BIL     3.20%   $32,040.00           349.3621
BILS    3.14%   $31,370.00           315.6888
BOXX    3.16%   $31,610.00           279.7840
BSCQ    0.45%    $4,480.00           229.6847
BSV     0.76%    $7,620.00            97.1939
CBOE    5.10%   $51,010.00           206.9287
CLS     1.22%   $12,220.00            71.7894
CME     1.18%   $11,750.00            42.0318
COKE    0.54%    $5,380.00            45.2862
COOP    0.50%    $5,030.00            32.0117
COR     3.12%   $31,180.00           107.3839
CVNA    1.24%   $12,400.00            37.3359
DFSD    1.10%   $10,980.00           229.1797
ERJ     0.34%    $3,430.00            71.6973
ESLT    0.52%    $5,160.00            11.3953
EXEL    0.78%    $7,820.00           171.4912
FER     0.24%    $2,420.00            45.3268
FLOT    1.78%   $17,800.00   

In [13]:
type(initial_portfolio)
len(display_df)

65

In [None]:
# --- NEW: Code to print a clear, filtered summary ---

print("\n" + "="*50)
print("          Detailed Portfolio Allocation          ")
print("="*50)

# Get the last prices as a pandas Series for easy lookup by ticker
last_prices = df.iloc[-1].to_numpy()

# Create a list to hold the details of our portfolio
portfolio_details = []

# Iterate through the cleaned_weights dictionary
for ticker, weight in cleaned_weights.items():
    # We only care about assets with a non-zero weight
    if weight > 0:
        # Calculate the dollar value for this specific asset
        dollar_value = PORTFOLIO_VALUE * weight
        
        # Look up the last price for this specific asset
        last_price = last_prices_series[ticker]
        
        # Calculate the number of shares to purchase
        shares_to_buy = dollar_value / last_price
        
        # Add the details to our list
        portfolio_details.append({
            "Ticker": ticker,
            "Weight": weight,
            "Dollar Value": dollar_value,
            "Shares to Purchase": shares_to_buy
        })

# Convert the list of details into a pandas DataFrame
if portfolio_details:
    results_df = pd.DataFrame(portfolio_details)
    results_df.set_index("Ticker", inplace=True)

    # --- Format the DataFrame for nice printing ---
    # Create a copy for display so we don't alter the original numbers
    display_df = results_df.copy()
    display_df["Weight"] = display_df["Weight"].map('{:.2%}'.format)
    display_df["Dollar Value"] = display_df["Dollar Value"].map('${:,.2f}'.format)
    display_df["Shares to Purchase"] = display_df["Shares to Purchase"].map('{:.4f}'.format)
    
    print(display_df)
else:
    print("The optimization resulted in zero allocation to all assets.")

print("="*50)

# You can now proceed with your final matrix multiplication using the 'initial_portfolio' array
# e.g., Portfolio_Assets = TradeData @ initial_portfolio

In [None]:
ordered_weights