In [29]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils

# --- 5. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt\_working
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


In [30]:
train = pd.read_csv(PARENT_DIR / 'train_data.csv')
trade = pd.read_csv(PARENT_DIR / 'trade_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

print(f'train:\n{train}')
print(f'\ntrade:\n{trade}')

train:
            date   tic      close       high        low       open           volume      day      macd    boll_ub    boll_lb     rsi_30     cci_30      dx_30  close_30_sma  close_60_sma       vix  turbulence
                                                                                                                                                                                                               
0     2009-01-02  AAPL   2.727417   2.736134   2.559415   2.581054 746015200.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000      2.727417      2.727417 39.189999    0.000000
0     2009-01-02  AMGN  40.791451  40.853685  39.933992  40.514850   6547900.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000     40.791451     40.791451 39.189999    0.000000
0     2009-01-02   AXP  14.891695  15.038070  14.175229  14.306196  10955700.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000    

In [31]:
def process_df_for_mvo(df):
  return df.pivot(index="date", columns="tic", values="close")

In [32]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai
# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

In [33]:
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

array([[ 88.485, 218.336,  88.271, ...,  40.327,  31.709,  37.15 ],
       [ 88.485, 221.007,  88.534, ...,  40.415,  32.562,  37.001],
       [ 90.852, 219.303,  90.646, ...,  40.747,  33.477,  36.901],
       ...,
       [145.68 , 184.125, 173.826, ...,  41.435,  40.18 ,  47.494],
       [146.346, 185.056, 172.529, ...,  41.829,  39.437,  47.079],
       [145.886, 183.194, 169.745, ...,  41.474,  38.465,  46.693]])

In [57]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print(f'arReturns in k-portfolio 1\n',arReturns)
print(f'\nMean returns of assets in k-portfolio 1\n', meanReturns)
print(f'\nVariance-Covariance matrix of returns\n', covReturns)

arReturns in k-portfolio 1
 [[ 4.22   1.119  3.207 ... -6.236  5.049 -1.154]
 [-1.649 -2.196  5.614 ... -1.601 -0.484 -0.885]
 [-2.161 -0.24  -4.213 ...  1.27   1.198 -0.857]
 ...
 [-3.073 -0.965 -4.527 ... -2.063 -2.069 -1.161]
 [ 2.305 -0.55   1.177 ...  2.878  2.793  0.625]
 [ 0.835  1.857  0.719 ...  0.805  0.165  0.605]]

Mean returns of assets in k-portfolio 1
 [0.136 0.068 0.086 0.083 0.066 0.134 0.06  0.035 0.072 0.056 0.103 0.073
 0.033 0.076 0.047 0.073 0.042 0.056 0.054 0.056 0.103 0.089 0.041 0.053
 0.104 0.11  0.044 0.042 0.042]

Variance-Covariance matrix of returns
 [[3.156 1.066 1.768 1.669 1.722 1.814 1.569 1.302 1.302 1.811 1.303 1.432
  1.218 1.674 0.74  1.839 0.719 0.884 1.241 0.823 1.561 1.324 0.752 1.027
  1.298 1.466 0.657 1.078 0.631]
 [1.066 2.571 1.306 1.123 1.193 1.319 1.116 1.053 1.045 1.269 1.068 1.089
  0.899 1.218 0.926 1.391 0.682 0.727 1.025 1.156 1.166 0.984 0.798 0.956
  1.259 1.111 0.688 1.091 0.682]
 [1.768 1.306 4.847 2.73  2.6   2.128 1.944 2.141 

### Summary of the Workflow
1. Setup: Provide the model with historical asset returns (meanReturns) and their risk/correlation profile (covReturns), and set rules (like no short-selling and a 50% max allocation per asset).
2. Optimize: Solve for the portfolio that gives the best risk-adjusted return (maximum Sharpe Ratio).
3. Clean: Tidy up the resulting percentage allocations to make them practical.
4. Execute: Convert these optimal percentages into actual dollar amounts based on a total portfolio size of $1,000,000.

In [35]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
mvo_weights  # Dollar amount allocation

array([375410.,      0.,      0.,      0.,      0.,  85810.,      0.,
            0.,      0.,      0., 205910.,      0.,      0.,      0.,
            0.,      0.,      0.,      0.,      0.,      0.,  49370.,
            0.,      0.,      0., 142510., 141010.,      0.,      0.,
            0.])

In [36]:
cleaned_weights_mean

OrderedDict([(0, 0.37541),
             (1, 0.0),
             (2, 0.0),
             (3, 0.0),
             (4, 0.0),
             (5, 0.08581),
             (6, 0.0),
             (7, 0.0),
             (8, 0.0),
             (9, 0.0),
             (10, 0.20591),
             (11, 0.0),
             (12, 0.0),
             (13, 0.0),
             (14, 0.0),
             (15, 0.0),
             (16, 0.0),
             (17, 0.0),
             (18, 0.0),
             (19, 0.0),
             (20, 0.04937),
             (21, 0.0),
             (22, 0.0),
             (23, 0.0),
             (24, 0.14251),
             (25, 0.14101),
             (26, 0.0),
             (27, 0.0),
             (28, 0.0)])

In [37]:
LastPrice = np.array([1/p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)
Initial_Portfolio  # number of shares

array([4234.615,    0.   ,    0.   ,    0.   ,    0.   ,  462.082,
          0.   ,    0.   ,    0.   ,    0.   ,  927.12 ,    0.   ,
          0.   ,    0.   ,    0.   ,    0.   ,    0.   ,    0.   ,
          0.   ,    0.   ,  253.205,    0.   ,    0.   ,    0.   ,
        521.427,  756.39 ,    0.   ,    0.   ,    0.   ])

In [38]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])
print(f'MVO_result:\n{MVO_result}')

MVO_result:
                 Mean Var
date                     
2020-07-01 1001536.134107
2020-07-02 1004131.147730
2020-07-06 1022196.153661
2020-07-07 1012410.866996
2020-07-08 1027198.165629
...                   ...
2021-10-21 1524363.961577
2021-10-22 1527223.553729
2021-10-25 1533948.898311
2021-10-26 1537081.639312
2021-10-27 1528623.280691

[335 rows x 1 columns]


##########################

# Verify Calculation

In [39]:
# TradeData.index.names
print(f'TradeData.index.names: {TradeData.index.names}')
print(f'TradeData.columns.names: {TradeData.columns.names}')
print(f'\nTradeData:\n{TradeData}')

TradeData.index.names: ['date']
TradeData.columns.names: ['tic']

TradeData:
tic              AAPL       AMGN        AXP         BA        CAT        CRM      CSCO       CVX        DIS         GS         HD        HON        IBM      INTC        JNJ        JPM        KO        MCD        MMM       MRK       MSFT        NKE         PG        TRV        UNH          V        VZ       WBA       WMT
date                                                                                                                                                                                                                                                                                                                             
2020-07-01  88.485016 218.336395  88.271126 180.320007 113.409111 190.223175 39.308640 70.369415 111.156929 175.233932 220.004150 130.247833  90.751396 52.425770 121.802376  80.762329 38.513268 164.716431 107.145050 63.806126 196.120148  91.368599 105.358139 101.159019 275.88360

In [40]:
# StockData.index.names
print(f'StockData.index.names: {StockData.index.names}')
print(f'StockData.columns.names: {StockData.columns.names}')
print(f'\nStockData:\n{StockData}')

StockData.index.names: ['date']
StockData.columns.names: ['tic']

StockData:
tic             AAPL       AMGN       AXP         BA        CAT        CRM      CSCO       CVX        DIS         GS         HD        HON       IBM      INTC        JNJ       JPM        KO        MCD        MMM       MRK       MSFT       NKE         PG        TRV        UNH          V        VZ       WBA       WMT
date                                                                                                                                                                                                                                                                                                                        
2009-01-02  2.727417  40.791451 14.891695  33.941090  30.233925   8.431122 11.099333 39.716728  20.259752  65.354576  16.048880  22.746801 47.715305  9.516951  37.172798 20.872557 13.757771  40.240009  29.900188 16.682100  14.897431 10.691023  38.529922  30.502073  21.773306  11.876763 13

In [41]:
import pandas as pd

# Assume your DataFrame is named StockData
# If 'date' is a column, set it as the index first. This is crucial for time-series operations.
if 'date' in StockData.columns:
    StockData = StockData.set_index('date')

# Ensure the index is a datetime object (pandas usually infers this, but it's good practice)
StockData.index = pd.to_datetime(StockData.index)

# Calculate daily returns using the most efficient method
# This computes (price_today / price_yesterday) - 1 for each column
daily_returns_pct = StockData.pct_change() * 100

# The first row will be all NaN since there's no prior day to compute a return from.
# You can drop it or fill it if needed.
daily_returns_pct = daily_returns_pct.dropna() 

print(daily_returns_pct)

tic             AAPL      AMGN       AXP        BA       CAT       CRM      CSCO       CVX       DIS        GS        HD       HON       IBM      INTC       JNJ       JPM        KO       MCD       MMM       MRK      MSFT       NKE        PG       TRV       UNH         V        VZ       WBA       WMT
date                                                                                                                                                                                                                                                                                                        
2009-01-05  4.220403  1.118857  3.207452  2.033121 -1.769339 -1.969432  0.884427  0.182961 -1.755846  2.328302  2.403677 -0.519325 -0.629503 -1.907940 -0.989315 -6.698561 -1.002123 -0.298047 -1.165740 -1.516151  0.934551  0.716177 -0.716606 -1.482284 -1.631022  0.711082 -6.235544  5.048914 -1.154236
2009-01-06 -1.649439 -2.196159  5.614002  0.303293 -0.607656  3.748130  3.974295  0.900087  3.446

In [42]:
# Computes the average daily return for each ticker (column)
mean_returns_pct = daily_returns_pct.mean()

# Computes the covariance between all pairs of tickers.
# The diagonal elements are the variance of each ticker's returns.
cov_matrix = daily_returns_pct.cov()

print(f'mean_returns_pct.head():\n{mean_returns_pct.head()}')
print(f'\ncov_matrix.head():\n{cov_matrix.head()}')

mean_returns_pct.head():
tic
AAPL   0.136238
AMGN   0.068098
AXP    0.085739
BA     0.082866
CAT    0.065957
dtype: float64

cov_matrix.head():
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                                                                                                                                                      
AAPL 3.155883 1.066386 1.767506 1.668571 1.722293 1.814279 1.569301 1.302347 1.302305 1.811224 1.302946 1.431675 1.218217 1.673847 0.740312 1.838581 0.718524 0.883584 1.241463 0.822611 1.560818 1.324481 0.752285 1.026957 1.298357 1.465668 0.657259 1.078277 0.631340
AMGN 1.066386 2.570602 1.305772 1.122571 1.193356 1.318668

In [43]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(mean_returns_pct, cov_matrix, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
# mvo_weights = np.array([1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
# mvo_weights

In [44]:
# Create a DataFrame from the tickers and dollar amounts
df_allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights_mean.keys()),
    'Dollar_Amount': mvo_weights
})

print(f'df_allocation:\n{df_allocation}')

df_allocation:
   Ticker  Dollar_Amount
0    AAPL  375410.000000
1    AMGN       0.000000
2     AXP       0.000000
3      BA       0.000000
4     CAT       0.000000
5     CRM   85810.000000
6    CSCO       0.000000
7     CVX       0.000000
8     DIS       0.000000
9      GS       0.000000
10     HD  205910.000000
11    HON       0.000000
12    IBM       0.000000
13   INTC       0.000000
14    JNJ       0.000000
15    JPM       0.000000
16     KO       0.000000
17    MCD       0.000000
18    MMM       0.000000
19    MRK       0.000000
20   MSFT   49370.000000
21    NKE       0.000000
22     PG       0.000000
23    TRV       0.000000
24    UNH  142510.000000
25      V  141010.000000
26     VZ       0.000000
27    WBA       0.000000
28    WMT       0.000000


In [45]:
# 1. Filter out assets with zero allocation
df_invest = df_allocation[df_allocation['Dollar_Amount'] > 0.01].copy()

# 2. Sort by the amount to be invested (descending)
df_invest = df_invest.sort_values(by='Dollar_Amount', ascending=False)

# # 3. (Optional) Format the dollar amount for better readability
# df_invest['Dollar_Amount'] = df_invest['Dollar_Amount'].map('${:,.2f}'.format)

print(f'df_invest:\n{df_invest}')

df_invest:
   Ticker  Dollar_Amount
0    AAPL  375410.000000
10     HD  205910.000000
24    UNH  142510.000000
25      V  141010.000000
5     CRM   85810.000000
20   MSFT   49370.000000


In [46]:
# StockData = pd.DataFrame(data_stock).set_index('tic')

# Extract the last row of StockData as a Series
last_prices = StockData.tail(1).iloc[0]

# Map the 'Ticker' column in df_invest to the last_prices Series
df_invest['last_price'] = df_invest['Ticker'].map(last_prices)

df_invest['shares'] = df_invest['Dollar_Amount'] / df_invest['last_price']

# Display the updated df_invest
print(df_invest)

   Ticker  Dollar_Amount  last_price      shares
0    AAPL  375410.000000   88.652687 4234.615017
10     HD  205910.000000  222.096497  927.119532
24    UNH  142510.000000  273.307526  521.427281
25      V  141010.000000  186.425095  756.389585
5     CRM   85810.000000  185.702789  462.082451
20   MSFT   49370.000000  194.979980  253.205482


In [47]:
# More concise version
portfolio_tickers = df_invest['Ticker'].tolist()
available_tickers = [ticker for ticker in portfolio_tickers if ticker in TradeData.columns]

# Create a shares series with only available tickers
shares_series = df_invest[df_invest['Ticker'].isin(available_tickers)].set_index('Ticker')['shares']

# Calculate daily portfolio values
daily_values = TradeData[available_tickers].multiply(shares_series, axis=1)
portfolio_time_series = daily_values.sum(axis=1)

# Create final result DataFrame
portfolio_result = pd.DataFrame({
    'Portfolio_Value': portfolio_time_series
})

print("Mean-variance Portfolio value time series:")
print(portfolio_result.head(10))
print(f"\nPortfolio stats:")
print(f"Start value: ${portfolio_result['Portfolio_Value'].iloc[0]:,.2f}")
print(f"End value: ${portfolio_result['Portfolio_Value'].iloc[-1]:,.2f}")
print(f"Total return: {((portfolio_result['Portfolio_Value'].iloc[-1] / portfolio_result['Portfolio_Value'].iloc[0]) - 1) * 100:.2f}%")

Mean-variance Portfolio value time series:
            Portfolio_Value
date                       
2020-07-01   1001536.134107
2020-07-02   1004131.147730
2020-07-06   1022196.153661
2020-07-07   1012410.866996
2020-07-08   1027198.165629
2020-07-09   1022896.935108
2020-07-10   1024625.851853
2020-07-13   1017492.878424
2020-07-14   1038984.450149
2020-07-15   1041192.177049

Portfolio stats:
Start value: $1,001,536.13
End value: $1,528,623.28
Total return: 52.63%


In [48]:
print(f'shares_series:\n{shares_series}')
print(f'\nTradeData[available_tickers]:\n{TradeData[available_tickers]}')
print(f'\ndaily_values = TradeData[available_tickers].multiply(shares_series, axis=1)')
print(f'daily_values:\n{daily_values}')
print(f'\nportfolio_time_series = daily_values.sum(axis=1)')
print(f'portfolio_time_series;\n{portfolio_time_series}')

shares_series:
Ticker
AAPL   4234.615017
HD      927.119532
UNH     521.427281
V       756.389585
CRM     462.082451
MSFT    253.205482
Name: shares, dtype: float64

TradeData[available_tickers]:
tic              AAPL         HD        UNH          V        CRM       MSFT
date                                                                        
2020-07-01  88.485016 220.004150 275.883606 187.013794 190.223175 196.120148
2020-07-02  88.485016 220.314484 276.374664 188.837784 190.857605 197.614746
2020-07-06  90.852013 221.245392 280.590881 190.854843 196.002533 201.868637
2020-07-07  90.570114 219.294922 274.882843 187.419128 194.674164 199.521317
2020-07-08  92.679512 220.908493 276.467316 188.258774 198.540283 203.909393
...               ...        ...        ...        ...        ...        ...
2021-10-21 146.503113 332.192474 417.441620 223.846924 287.292603 301.193115
2021-10-22 145.728882 333.440613 423.935272 224.809433 290.018677 299.642273
2021-10-25 145.679886 338.241913 4

##########################

# PyPortfOpt

In [50]:
df = StockData.copy()
print(f'df:\n{df}')

df:
tic             AAPL       AMGN       AXP         BA        CAT        CRM      CSCO       CVX        DIS         GS         HD        HON       IBM      INTC        JNJ       JPM        KO        MCD        MMM       MRK       MSFT       NKE         PG        TRV        UNH          V        VZ       WBA       WMT
date                                                                                                                                                                                                                                                                                                                        
2009-01-02  2.727417  40.791451 14.891695  33.941090  30.233925   8.431122 11.099333 39.716728  20.259752  65.354576  16.048880  22.746801 47.715305  9.516951  37.172798 20.872557 13.757771  40.240009  29.900188 16.682100  14.897431 10.691023  38.529922  30.502073  21.773306  11.876763 13.710851 15.305817 13.433236
2009-01-05  2.842525  41.247849 15.369339  34

In [51]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(df, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
# mu = mean_historical_return(df, returns_data=False, compounding=False, frequency=252, log_returns=False)
S = CovarianceShrinkage(df).ledoit_wolf()

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

mu.head():
tic
AAPL   0.354397
AMGN   0.149513
AXP    0.168516
BA     0.158303
CAT    0.122437
dtype: float64

S:
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                                                                                                                                                      
AAPL 0.079422 0.026585 0.044063 0.041597 0.042936 0.045229 0.039122 0.032467 0.032466 0.045153 0.032482 0.035691 0.030370 0.041728 0.018456 0.045835 0.017913 0.022027 0.030949 0.020507 0.038911 0.033019 0.018754 0.025602 0.032368 0.036539 0.016385 0.026881 0.015739
AMGN 0.026585 0.064831 0.032552 0.027985 0.029750 0.032874 0.027833 0.026263 0.026044 0.

In [52]:
pd.set_option('display.float_format', '{:.6f}'.format)

trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

--- Annualized Returns mu ---
tic
AAPL   0.354397
AMGN   0.149513
AXP    0.168516
BA     0.158303
CAT    0.122437
dtype: float64

--- Converted to Daily Returns ---
tic
AAPL   0.001205
AMGN   0.000553
AXP    0.000618
BA     0.000583
CAT    0.000458
dtype: float64


In [53]:
from pypfopt.efficient_frontier import EfficientFrontier

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))
weights = ef.max_sharpe()

In [54]:
cleaned_weights = ef.clean_weights()
ef.save_weights_to_file("weights.txt")  # saves to file
print(cleaned_weights)

OrderedDict([('AAPL', 0.43768), ('AMGN', 0.0), ('AXP', 0.0), ('BA', 0.0), ('CAT', 0.0), ('CRM', 0.04491), ('CSCO', 0.0), ('CVX', 0.0), ('DIS', 0.0), ('GS', 0.0), ('HD', 0.22789), ('HON', 0.0), ('IBM', 0.0), ('INTC', 0.0), ('JNJ', 0.0), ('JPM', 0.0), ('KO', 0.0), ('MCD', 0.0), ('MMM', 0.0), ('MRK', 0.0), ('MSFT', 0.04164), ('NKE', 0.0), ('PG', 0.0), ('TRV', 0.0), ('UNH', 0.10723), ('V', 0.14065), ('VZ', 0.0), ('WBA', 0.0), ('WMT', 0.0)])


In [55]:
# Create a DataFrame from the tickers and dollar amounts
allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights.keys()),
    'Weight': list(cleaned_weights.values())
})

print(f'allocation:\n{allocation}')

allocation:
   Ticker   Weight
0    AAPL 0.437680
1    AMGN 0.000000
2     AXP 0.000000
3      BA 0.000000
4     CAT 0.000000
5     CRM 0.044910
6    CSCO 0.000000
7     CVX 0.000000
8     DIS 0.000000
9      GS 0.000000
10     HD 0.227890
11    HON 0.000000
12    IBM 0.000000
13   INTC 0.000000
14    JNJ 0.000000
15    JPM 0.000000
16     KO 0.000000
17    MCD 0.000000
18    MMM 0.000000
19    MRK 0.000000
20   MSFT 0.041640
21    NKE 0.000000
22     PG 0.000000
23    TRV 0.000000
24    UNH 0.107230
25      V 0.140650
26     VZ 0.000000
27    WBA 0.000000
28    WMT 0.000000


In [None]:
# Show filtered out rows
filtered = allocation[allocation['Weight'] > 0.01]
print("Filtered rows (weights > 1%):")
print(filtered)

Filtered rows (weights > 1%):
   Ticker   Weight
0    AAPL 0.437680
5     CRM 0.044910
10     HD 0.227890
20   MSFT 0.041640
24    UNH 0.107230
25      V 0.140650
