In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.3f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils

# --- 5. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt\_working
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


In [2]:
train = pd.read_csv(PARENT_DIR / 'train_data.csv')
trade = pd.read_csv(PARENT_DIR / 'trade_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

print(f'train:\n{train}')
print(f'\ntrade:\n{trade}')

train:
            date   tic   close    high     low    open        volume   day   macd  boll_ub  boll_lb  rsi_30  cci_30   dx_30  close_30_sma  close_60_sma    vix  turbulence
                                                                                                                                                                          
0     2009-01-02  AAPL   2.727   2.736   2.559   2.581 746015200.000 4.000  0.000    2.948    2.622 100.000  66.667 100.000         2.727         2.727 39.190       0.000
0     2009-01-02  AMGN  40.791  40.854  39.934  40.515   6547900.000 4.000  0.000    2.948    2.622 100.000  66.667 100.000        40.791        40.791 39.190       0.000
0     2009-01-02   AXP  14.892  15.038  14.175  14.306  10955700.000 4.000  0.000    2.948    2.622 100.000  66.667 100.000        14.892        14.892 39.190       0.000
0     2009-01-02    BA  33.941  34.174  32.088  32.103   7010200.000 4.000  0.000    2.948    2.622 100.000  66.667 100.000        33.941 

In [3]:
def process_df_for_mvo(df):
  return df.pivot(index="date", columns="tic", values="close")

In [4]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai
# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

In [5]:
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

array([[ 88.48501587, 218.33639526,  88.27112579, ...,  40.32659149,
         31.70895386,  37.14975357],
       [ 88.48501587, 221.00656128,  88.53392029, ...,  40.41510391,
         32.56217194,  37.00077438],
       [ 90.85201263, 219.30348206,  90.64569855, ...,  40.74704361,
         33.47745514,  36.90145111],
       ...,
       [145.67988586, 184.12481689, 173.82574463, ...,  41.43462753,
         40.17977142,  47.49373245],
       [146.34632874, 185.05596924, 172.52902222, ...,  41.82864761,
         39.43690872,  47.07912064],
       [145.88569641, 183.19369507, 169.74493408, ...,  41.47403717,
         38.46547699,  46.69298553]])

In [7]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print('Mean returns of assets in k-portfolio 1\n', meanReturns)
print('Variance-Covariance matrix of returns\n', covReturns)

Mean returns of assets in k-portfolio 1
 [0.136 0.068 0.086 0.083 0.066 0.134 0.06  0.035 0.072 0.056 0.103 0.073
 0.033 0.076 0.047 0.073 0.042 0.056 0.054 0.056 0.103 0.089 0.041 0.053
 0.104 0.11  0.044 0.042 0.042]
Variance-Covariance matrix of returns
 [[3.156 1.066 1.768 1.669 1.722 1.814 1.569 1.302 1.302 1.811 1.303 1.432
  1.218 1.674 0.74  1.839 0.719 0.884 1.241 0.823 1.561 1.324 0.752 1.027
  1.298 1.466 0.657 1.078 0.631]
 [1.066 2.571 1.306 1.123 1.193 1.319 1.116 1.053 1.045 1.269 1.068 1.089
  0.899 1.218 0.926 1.391 0.682 0.727 1.025 1.156 1.166 0.984 0.798 0.956
  1.259 1.111 0.688 1.091 0.682]
 [1.768 1.306 4.847 2.73  2.6   2.128 1.944 2.141 2.17  3.142 1.932 2.283
  1.56  2.012 0.993 3.707 1.094 1.319 1.845 1.236 1.899 1.894 1.041 1.921
  1.823 2.314 0.986 1.421 0.707]
 [1.669 1.123 2.73  4.892 2.363 1.979 1.7   2.115 1.959 2.387 1.773 2.319
  1.571 1.797 0.968 2.597 1.144 1.298 1.643 1.071 1.615 1.775 0.91  1.666
  1.707 1.784 0.82  1.345 0.647]
 [1.722 1.193 2.6 

### Summary of the Workflow
1. Setup: Provide the model with historical asset returns (meanReturns) and their risk/correlation profile (covReturns), and set rules (like no short-selling and a 50% max allocation per asset).
2. Optimize: Solve for the portfolio that gives the best risk-adjusted return (maximum Sharpe Ratio).
3. Clean: Tidy up the resulting percentage allocations to make them practical.
4. Execute: Convert these optimal percentages into actual dollar amounts based on a total portfolio size of $1,000,000.

In [None]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
mvo_weights  # Dollar amount allocation

array([375410.,      0.,      0.,      0.,      0.,  85810.,      0.,
            0.,      0.,      0., 205910.,      0.,      0.,      0.,
            0.,      0.,      0.,      0.,      0.,      0.,  49370.,
            0.,      0.,      0., 142510., 141010.,      0.,      0.,
            0.])

In [None]:
cleaned_weights_mean

In [None]:
LastPrice = np.array([1/p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)
Initial_Portfolio  # number of shares

array([4234.615,    0.   ,    0.   ,    0.   ,    0.   ,  462.082,
          0.   ,    0.   ,    0.   ,    0.   ,  927.12 ,    0.   ,
          0.   ,    0.   ,    0.   ,    0.   ,    0.   ,    0.   ,
          0.   ,    0.   ,  253.205,    0.   ,    0.   ,    0.   ,
        521.427,  756.39 ,    0.   ,    0.   ,    0.   ])

In [67]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])
print(f'MVO_result:\n{MVO_result}')

MVO_result:
              Mean Var
date                  
2020-07-01 1001536.134
2020-07-02 1004131.148
2020-07-06 1022196.154
2020-07-07 1012410.867
2020-07-08 1027198.166
...                ...
2021-10-21 1524363.962
2021-10-22 1527223.554
2021-10-25 1533948.898
2021-10-26 1537081.639
2021-10-27 1528623.281

[335 rows x 1 columns]


### Verify Calculation

In [6]:
# TradeData.index.names
print(f'TradeData.index.names: {TradeData.index.names}')
print(f'TradeData.columns.names: {TradeData.columns.names}')
print(f'\nTradeData:\n{TradeData}')

TradeData.index.names: ['date']
TradeData.columns.names: ['tic']

TradeData:
tic           AAPL    AMGN     AXP      BA     CAT     CRM   CSCO    CVX     DIS      GS      HD     HON     IBM   INTC     JNJ     JPM     KO     MCD     MMM    MRK    MSFT     NKE      PG     TRV     UNH       V     VZ    WBA    WMT
date                                                                                                                                                                                                                                      
2020-07-01  88.485 218.336  88.271 180.320 113.409 190.223 39.309 70.369 111.157 175.234 220.004 130.248  90.751 52.426 121.802  80.762 38.513 164.716 107.145 63.806 196.120  91.369 105.358 101.159 275.884 187.014 40.327 31.709 37.150
2020-07-02  88.485 221.007  88.534 180.810 114.903 190.858 39.248 70.924 110.341 175.083 220.314 131.070  91.639 52.711 122.314  81.025 38.565 163.700 108.255 64.345 197.615  92.335 106.148 101.123 276.375 188.838 40.4

In [20]:
# StockData.index.names
print(f'StockData.index.names: {StockData.index.names}')
print(f'StockData.columns.names: {StockData.columns.names}')
print(f'\nStockData:\n{StockData}')

StockData.index.names: ['date']
StockData.columns.names: ['tic']

StockData:
tic          AAPL    AMGN    AXP      BA     CAT     CRM   CSCO    CVX     DIS      GS      HD     HON    IBM   INTC     JNJ    JPM     KO     MCD     MMM    MRK    MSFT    NKE      PG     TRV     UNH       V     VZ    WBA    WMT
date                                                                                                                                                                                                                                 
2009-01-02  2.727  40.791 14.892  33.941  30.234   8.431 11.099 39.717  20.260  65.355  16.049  22.747 47.715  9.517  37.173 20.873 13.758  40.240  29.900 16.682  14.897 10.691  38.530  30.502  21.773  11.877 13.711 15.306 13.433
2009-01-05  2.843  41.248 15.369  34.631  29.699   8.265 11.197 39.789  19.904  66.876  16.435  22.629 47.415  9.335  36.805 19.474 13.620  40.120  29.552 16.429  15.037 10.768  38.254  30.050  21.418  11.961 12.856 16.079 13.278
200

In [18]:
import pandas as pd

# Assume your DataFrame is named StockData
# If 'date' is a column, set it as the index first. This is crucial for time-series operations.
if 'date' in StockData.columns:
    StockData = StockData.set_index('date')

# Ensure the index is a datetime object (pandas usually infers this, but it's good practice)
StockData.index = pd.to_datetime(StockData.index)

# Calculate daily returns using the most efficient method
# This computes (price_today / price_yesterday) - 1 for each column
daily_returns_pct = StockData.pct_change() * 100

# The first row will be all NaN since there's no prior day to compute a return from.
# You can drop it or fill it if needed.
daily_returns_pct = daily_returns_pct.dropna() 

print(daily_returns_pct)

tic          AAPL   AMGN    AXP     BA    CAT    CRM   CSCO    CVX    DIS     GS     HD    HON    IBM   INTC    JNJ    JPM     KO    MCD    MMM    MRK   MSFT    NKE     PG    TRV    UNH      V     VZ    WBA    WMT
date                                                                                                                                                                                                                 
2009-01-05  4.220  1.119  3.207  2.033 -1.769 -1.969  0.884  0.183 -1.756  2.328  2.404 -0.519 -0.630 -1.908 -0.989 -6.699 -1.002 -0.298 -1.166 -1.516  0.935  0.716 -0.717 -1.482 -1.631  0.711 -6.236  5.049 -1.154
2009-01-06 -1.649 -2.196  5.614  0.303 -0.608  3.748  3.974  0.900  3.447 -0.079  2.226  4.524  2.776  3.085 -0.599  2.154 -1.607 -2.234  1.179 -1.834  1.170 -2.021 -0.289 -3.054 -2.358  7.042 -1.601 -0.484 -0.885
2009-01-07 -2.161 -0.240 -4.213 -3.347 -4.651 -7.514 -2.642 -4.383 -4.648 -4.746 -2.732 -4.412 -1.614 -6.051 -0.938 -5.991  0.492 -1.448 -1.892 

In [19]:
# Computes the average daily return for each ticker (column)
mean_returns_pct = daily_returns_pct.mean()

# Computes the covariance between all pairs of tickers.
# The diagonal elements are the variance of each ticker's returns.
cov_matrix = daily_returns_pct.cov()

print(f'mean_returns_pct.head():\n{mean_returns_pct.head()}')
print(f'\ncov_matrix.head():\n{cov_matrix.head()}')

mean_returns_pct.head():
tic
AAPL   0.136
AMGN   0.068
AXP    0.086
BA     0.083
CAT    0.066
dtype: float64

cov_matrix.head():
tic   AAPL  AMGN   AXP    BA   CAT   CRM  CSCO   CVX   DIS    GS    HD   HON   IBM  INTC   JNJ   JPM    KO   MCD   MMM   MRK  MSFT   NKE    PG   TRV   UNH     V    VZ   WBA   WMT
tic                                                                                                                                                                               
AAPL 3.156 1.066 1.768 1.669 1.722 1.814 1.569 1.302 1.302 1.811 1.303 1.432 1.218 1.674 0.740 1.839 0.719 0.884 1.241 0.823 1.561 1.324 0.752 1.027 1.298 1.466 0.657 1.078 0.631
AMGN 1.066 2.571 1.306 1.123 1.193 1.319 1.116 1.053 1.045 1.269 1.068 1.089 0.899 1.218 0.926 1.391 0.682 0.727 1.025 1.156 1.166 0.984 0.798 0.956 1.259 1.111 0.688 1.091 0.682
AXP  1.768 1.306 4.847 2.730 2.600 2.128 1.944 2.141 2.170 3.142 1.932 2.283 1.560 2.012 0.993 3.707 1.094 1.319 1.845 1.236 1.899 1.894 1.041 1.921 1.823 

In [61]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(mean_returns_pct, cov_matrix, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
# mvo_weights = np.array([1000000 * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
# mvo_weights

In [62]:
# Create a DataFrame from the tickers and dollar amounts
df_allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights_mean.keys()),
    'Dollar_Amount': mvo_weights
})

print(f'df_allocation:\n{df_allocation}')

df_allocation:
   Ticker  Dollar_Amount
0    AAPL     375410.000
1    AMGN          0.000
2     AXP          0.000
3      BA          0.000
4     CAT          0.000
5     CRM      85810.000
6    CSCO          0.000
7     CVX          0.000
8     DIS          0.000
9      GS          0.000
10     HD     205910.000
11    HON          0.000
12    IBM          0.000
13   INTC          0.000
14    JNJ          0.000
15    JPM          0.000
16     KO          0.000
17    MCD          0.000
18    MMM          0.000
19    MRK          0.000
20   MSFT      49370.000
21    NKE          0.000
22     PG          0.000
23    TRV          0.000
24    UNH     142510.000
25      V     141010.000
26     VZ          0.000
27    WBA          0.000
28    WMT          0.000


In [63]:
# 1. Filter out assets with zero allocation
df_invest = df_allocation[df_allocation['Dollar_Amount'] > 0.01].copy()

# 2. Sort by the amount to be invested (descending)
df_invest = df_invest.sort_values(by='Dollar_Amount', ascending=False)

# # 3. (Optional) Format the dollar amount for better readability
# df_invest['Dollar_Amount'] = df_invest['Dollar_Amount'].map('${:,.2f}'.format)

print(f'df_invest:\n{df_invest}')

df_invest:
   Ticker  Dollar_Amount
0    AAPL     375410.000
10     HD     205910.000
24    UNH     142510.000
25      V     141010.000
5     CRM      85810.000
20   MSFT      49370.000


In [None]:
# StockData = pd.DataFrame(data_stock).set_index('tic')

# Extract the last row of StockData as a Series
last_prices = StockData.tail(1).iloc[0]

# Map the 'Ticker' column in df_invest to the last_prices Series
df_invest['last_price'] = df_invest['Ticker'].map(last_prices)

df_invest['shares'] = df_invest['Dollar_Amount'] / df_invest['last_price']

# Display the updated df_invest
print(df_invest)

   Ticker  Dollar_Amount  last_price   shares
0    AAPL     375410.000      88.653 4234.615
10     HD     205910.000     222.096  927.120
24    UNH     142510.000     273.308  521.427
25      V     141010.000     186.425  756.390
5     CRM      85810.000     185.703  462.082
20   MSFT      49370.000     194.980  253.205


In [None]:
# More concise version
portfolio_tickers = df_invest['Ticker'].tolist()
available_tickers = [ticker for ticker in portfolio_tickers if ticker in TradeData.columns]

# Create a shares series with only available tickers
shares_series = df_invest[df_invest['Ticker'].isin(available_tickers)].set_index('Ticker')['shares']

# Calculate daily portfolio values
daily_values = TradeData[available_tickers].multiply(shares_series, axis=1)
portfolio_time_series = daily_values.sum(axis=1)

# Create final result DataFrame
portfolio_result = pd.DataFrame({
    'Portfolio_Value': portfolio_time_series
})

print("Mean-variance Portfolio value time series:")
print(portfolio_result.head(10))
print(f"\nPortfolio stats:")
print(f"Start value: ${portfolio_result['Portfolio_Value'].iloc[0]:,.2f}")
print(f"End value: ${portfolio_result['Portfolio_Value'].iloc[-1]:,.2f}")
print(f"Total return: {((portfolio_result['Portfolio_Value'].iloc[-1] / portfolio_result['Portfolio_Value'].iloc[0]) - 1) * 100:.2f}%")

Portfolio value time series:
            Portfolio_Value
date                       
2020-07-01      1001536.134
2020-07-02      1004131.148
2020-07-06      1022196.154
2020-07-07      1012410.867
2020-07-08      1027198.166
2020-07-09      1022896.935
2020-07-10      1024625.852
2020-07-13      1017492.878
2020-07-14      1038984.450
2020-07-15      1041192.177

Portfolio stats:
Start value: $1,001,536.13
End value: $1,528,623.28
Total return: 52.63%


In [84]:
print(f'shares_series:\n{shares_series}')
print(f'\nTradeData[available_tickers]:\n{TradeData[available_tickers]}')
print(f'\ndaily_values = TradeData[available_tickers].multiply(shares_series, axis=1)')
print(f'daily_values:\n{daily_values}')
print(f'\nportfolio_time_series = daily_values.sum(axis=1)')
print(f'portfolio_time_series;\n{portfolio_time_series}')

shares_series:
Ticker
AAPL   4234.615
HD      927.120
UNH     521.427
V       756.390
CRM     462.082
MSFT    253.205
Name: shares, dtype: float64

TradeData[available_tickers]:
tic           AAPL      HD     UNH       V     CRM    MSFT
date                                                      
2020-07-01  88.485 220.004 275.884 187.014 190.223 196.120
2020-07-02  88.485 220.314 276.375 188.838 190.858 197.615
2020-07-06  90.852 221.245 280.591 190.855 196.003 201.869
2020-07-07  90.570 219.295 274.883 187.419 194.674 199.521
2020-07-08  92.680 220.908 276.467 188.259 198.540 203.909
...            ...     ...     ...     ...     ...     ...
2021-10-21 146.503 332.192 417.442 223.847 287.293 301.193
2021-10-22 145.729 333.441 423.935 224.809 290.019 299.642
2021-10-25 145.680 338.242 424.247 227.386 291.367 298.644
2021-10-26 146.346 336.365 429.107 225.383 292.547 300.563
2021-10-27 145.886 339.208 428.013 209.788 292.586 313.221

[335 rows x 6 columns]

daily_values = TradeData[avail

In [None]:
###########################

In [86]:
df = StockData.copy()
print(f'df:\n{df}')

df:
tic          AAPL    AMGN    AXP      BA     CAT     CRM   CSCO    CVX     DIS      GS      HD     HON    IBM   INTC     JNJ    JPM     KO     MCD     MMM    MRK    MSFT    NKE      PG     TRV     UNH       V     VZ    WBA    WMT
date                                                                                                                                                                                                                                 
2009-01-02  2.727  40.791 14.892  33.941  30.234   8.431 11.099 39.717  20.260  65.355  16.049  22.747 47.715  9.517  37.173 20.873 13.758  40.240  29.900 16.682  14.897 10.691  38.530  30.502  21.773  11.877 13.711 15.306 13.433
2009-01-05  2.843  41.248 15.369  34.631  29.699   8.265 11.197 39.789  19.904  66.876  16.435  22.629 47.415  9.335  36.805 19.474 13.620  40.120  29.552 16.429  15.037 10.768  38.254  30.050  21.418  11.961 12.856 16.079 13.278
2009-01-06  2.796  40.342 16.232  34.736  29.519   8.575 11.643 40.148  20.5

In [None]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(df, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
# mu = mean_historical_return(df, returns_data=False, compounding=False, frequency=252, log_returns=False)
S = CovarianceShrinkage(df).ledoit_wolf()

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

mu.head():
tic
AAPL   0.343321
AMGN   0.171606
AXP    0.216063
BA     0.208823
CAT    0.166210
dtype: float64

S:
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                                                                                                                                                      
AAPL 0.079422 0.026585 0.044063 0.041597 0.042936 0.045229 0.039122 0.032467 0.032466 0.045153 0.032482 0.035691 0.030370 0.041728 0.018456 0.045835 0.017913 0.022027 0.030949 0.020507 0.038911 0.033019 0.018754 0.025602 0.032368 0.036539 0.016385 0.026881 0.015739
AMGN 0.026585 0.064831 0.032552 0.027985 0.029750 0.032874 0.027833 0.026263 0.026044 0.

In [96]:
pd.set_option('display.float_format', '{:.6f}'.format)

trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

--- Annualized Returns mu ---
tic
AAPL   0.343321
AMGN   0.171606
AXP    0.216063
BA     0.208823
CAT    0.166210
dtype: float64

--- Converted to Daily Returns ---
tic
AAPL   0.001172
AMGN   0.000629
AXP    0.000777
BA     0.000753
CAT    0.000610
dtype: float64


In [101]:
from pypfopt.efficient_frontier import EfficientFrontier

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))
weights = ef.max_sharpe()

In [102]:
cleaned_weights = ef.clean_weights()
ef.save_weights_to_file("weights.txt")  # saves to file
print(cleaned_weights)

OrderedDict([('AAPL', 0.37239), ('AMGN', 0.0), ('AXP', 0.0), ('BA', 0.0), ('CAT', 0.0), ('CRM', 0.08679), ('CSCO', 0.0), ('CVX', 0.0), ('DIS', 0.0), ('GS', 0.0), ('HD', 0.20448), ('HON', 0.0), ('IBM', 0.0), ('INTC', 0.0), ('JNJ', 0.0), ('JPM', 0.0), ('KO', 0.0), ('MCD', 0.0), ('MMM', 0.0), ('MRK', 0.0), ('MSFT', 0.05189), ('NKE', 0.0), ('PG', 0.0), ('TRV', 0.0), ('UNH', 0.14294), ('V', 0.1415), ('VZ', 0.0), ('WBA', 0.0), ('WMT', 0.0)])


In [104]:
# Create a DataFrame from the tickers and dollar amounts
allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights.keys()),
    'Weight': list(cleaned_weights.values())
})

print(f'allocation:\n{allocation}')

allocation:
   Ticker   Weight
0    AAPL 0.372390
1    AMGN 0.000000
2     AXP 0.000000
3      BA 0.000000
4     CAT 0.000000
5     CRM 0.086790
6    CSCO 0.000000
7     CVX 0.000000
8     DIS 0.000000
9      GS 0.000000
10     HD 0.204480
11    HON 0.000000
12    IBM 0.000000
13   INTC 0.000000
14    JNJ 0.000000
15    JPM 0.000000
16     KO 0.000000
17    MCD 0.000000
18    MMM 0.000000
19    MRK 0.000000
20   MSFT 0.051890
21    NKE 0.000000
22     PG 0.000000
23    TRV 0.000000
24    UNH 0.142940
25      V 0.141500
26     VZ 0.000000
27    WBA 0.000000
28    WMT 0.000000


In [None]:
# Show filtered out rows
filtered = allocation[allocation['Weight'] > 0.01]
print("Filtered rows (weights > 1%):")
print(filtered)

Filtered out rows (weights < 1%):
   Ticker   Weight
0    AAPL 0.372390
5     CRM 0.086790
10     HD 0.204480
20   MSFT 0.051890
24    UNH 0.142940
25      V 0.141500
