In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pprint
import inspect  # <--- ADD THIS LINE
from IPython.display import display, Markdown

# --- 1. PANDAS & IPYTHON OPTIONS ---
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 3000)
pd.set_option('display.float_format', '{:.6f}'.format)
%load_ext autoreload
%autoreload 2

# --- 2. PROJECT PATH CONFIGURATION ---
NOTEBOOK_DIR = Path.cwd()
PARENT_DIR = NOTEBOOK_DIR.parent
ROOT_DIR = NOTEBOOK_DIR.parent.parent  # Adjust if your notebook is in a 'notebooks' subdirectory
DATA_DIR = ROOT_DIR / 'data'
SRC_DIR = ROOT_DIR / 'src'

# Add 'src' to the Python path to import custom modules
if str(SRC_DIR) not in sys.path:
    sys.path.append(str(SRC_DIR))

# --- 3. IMPORT CUSTOM MODULES ---
import utils

# --- 4. PORTFOLIO VALUE ---
PORTFOLIO_VALUE = 1000000

# --- 5. VERIFICATION ---
print("--- Path Configuration ---")
print(f"✅ Project Root: {ROOT_DIR}")
print(f"✅ Parent Dir:   {PARENT_DIR}")
print(f"✅ Notebook Dir: {NOTEBOOK_DIR}")
print(f"✅ Data Dir:     {DATA_DIR}")
print(f"✅ Source Dir:   {SRC_DIR}")
assert all([ROOT_DIR.exists(), DATA_DIR.exists(), SRC_DIR.exists()]), "A key directory was not found!"

print("\n--- Module Verification ---")
print(f"✅ Successfully imported 'utils' and 'plotting_utils'.")

--- Path Configuration ---
✅ Project Root: c:\Users\ping\Files_win10\python\py311\stocks
✅ Parent Dir:   c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt
✅ Notebook Dir: c:\Users\ping\Files_win10\python\py311\stocks\notebooks_PyPortfOpt\_working
✅ Data Dir:     c:\Users\ping\Files_win10\python\py311\stocks\data
✅ Source Dir:   c:\Users\ping\Files_win10\python\py311\stocks\src

--- Module Verification ---
✅ Successfully imported 'utils' and 'plotting_utils'.


# FinRL Example of Mean Variance Optimization
* https://github.com/AI4Finance-Foundation/FinRL/blob/master/examples/Stock_NeurIPS2018_3_Backtest.ipynb

In [2]:
train = pd.read_csv(PARENT_DIR / 'train_data.csv')
trade = pd.read_csv(PARENT_DIR / 'trade_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following lines.
train = train.set_index(train.columns[0])
train.index.names = ['']
trade = trade.set_index(trade.columns[0])
trade.index.names = ['']

print(f'train:\n{train}')
print(f'\ntrade:\n{trade}')

train:
            date   tic      close       high        low       open           volume      day      macd    boll_ub    boll_lb     rsi_30     cci_30      dx_30  close_30_sma  close_60_sma       vix  turbulence
                                                                                                                                                                                                               
0     2009-01-02  AAPL   2.727417   2.736134   2.559415   2.581054 746015200.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000      2.727417      2.727417 39.189999    0.000000
0     2009-01-02  AMGN  40.791451  40.853685  39.933992  40.514850   6547900.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000     40.791451     40.791451 39.189999    0.000000
0     2009-01-02   AXP  14.891695  15.038070  14.175229  14.306196  10955700.000000 4.000000  0.000000   2.947759   2.622184 100.000000  66.666667 100.000000    

In [3]:
def process_df_for_mvo(df):
  return df.pivot(index="date", columns="tic", values="close")

In [4]:
# Codes in this section partially refer to Dr G A Vijayalakshmi Pai
# https://www.kaggle.com/code/vijipai/lesson-5-mean-variance-optimization-of-portfolios/notebook

def StockReturnsComputing(StockPrice, Rows, Columns):
  import numpy as np
  StockReturn = np.zeros([Rows-1, Columns])
  for j in range(Columns):        # j: Assets
    for i in range(Rows-1):     # i: Daily Prices
      StockReturn[i,j]=((StockPrice[i+1, j]-StockPrice[i,j])/StockPrice[i,j])* 100

  return StockReturn

In [5]:
StockData = process_df_for_mvo(train)
TradeData = process_df_for_mvo(trade)

TradeData.to_numpy()

array([[ 88.48501587, 218.33639526,  88.27112579, ...,  40.32659149,
         31.70895386,  37.14975357],
       [ 88.48501587, 221.00656128,  88.53392029, ...,  40.41510391,
         32.56217194,  37.00077438],
       [ 90.85201263, 219.30348206,  90.64569855, ...,  40.74704361,
         33.47745514,  36.90145111],
       ...,
       [145.67988586, 184.12481689, 173.82574463, ...,  41.43462753,
         40.17977142,  47.49373245],
       [146.34632874, 185.05596924, 172.52902222, ...,  41.82864761,
         39.43690872,  47.07912064],
       [145.88569641, 183.19369507, 169.74493408, ...,  41.47403717,
         38.46547699,  46.69298553]])

In [6]:
#compute asset returns
arStockPrices = np.asarray(StockData)
[Rows, Cols]=arStockPrices.shape
arReturns = StockReturnsComputing(arStockPrices, Rows, Cols)

#compute mean returns and variance covariance matrix of returns
meanReturns = np.mean(arReturns, axis = 0)
covReturns = np.cov(arReturns, rowvar=False)

#set precision for printing results
np.set_printoptions(precision=3, suppress = True)

#display mean returns and variance-covariance matrix of returns
print(f'arReturns in k-portfolio 1\n',arReturns)
print(f'\nMean returns of assets in k-portfolio 1\n', meanReturns)
print(f'\nVariance-Covariance matrix of returns\n', covReturns)

arReturns in k-portfolio 1
 [[ 4.22   1.119  3.207 ... -6.236  5.049 -1.154]
 [-1.649 -2.196  5.614 ... -1.601 -0.484 -0.885]
 [-2.161 -0.24  -4.213 ...  1.27   1.198 -0.857]
 ...
 [-3.073 -0.965 -4.527 ... -2.063 -2.069 -1.161]
 [ 2.305 -0.55   1.177 ...  2.878  2.793  0.625]
 [ 0.835  1.857  0.719 ...  0.805  0.165  0.605]]

Mean returns of assets in k-portfolio 1
 [0.136 0.068 0.086 0.083 0.066 0.134 0.06  0.035 0.072 0.056 0.103 0.073
 0.033 0.076 0.047 0.073 0.042 0.056 0.054 0.056 0.103 0.089 0.041 0.053
 0.104 0.11  0.044 0.042 0.042]

Variance-Covariance matrix of returns
 [[3.156 1.066 1.768 1.669 1.722 1.814 1.569 1.302 1.302 1.811 1.303 1.432
  1.218 1.674 0.74  1.839 0.719 0.884 1.241 0.823 1.561 1.324 0.752 1.027
  1.298 1.466 0.657 1.078 0.631]
 [1.066 2.571 1.306 1.123 1.193 1.319 1.116 1.053 1.045 1.269 1.068 1.089
  0.899 1.218 0.926 1.391 0.682 0.727 1.025 1.156 1.166 0.984 0.798 0.956
  1.259 1.111 0.688 1.091 0.682]
 [1.768 1.306 4.847 2.73  2.6   2.128 1.944 2.141 

### Summary of the Workflow
1. Setup: Provide the model with historical asset returns (meanReturns) and their risk/correlation profile (covReturns), and set rules (like no short-selling and a 50% max allocation per asset).
2. Optimize: Solve for the portfolio that gives the best risk-adjusted return (maximum Sharpe Ratio).
3. Clean: Tidy up the resulting percentage allocations to make them practical.
4. Execute: Convert these optimal percentages into actual dollar amounts based on a total portfolio size of $1,000,000.

In [7]:
from pypfopt.efficient_frontier import EfficientFrontier

ef_mean = EfficientFrontier(meanReturns, covReturns, weight_bounds=(0, 0.5))
raw_weights_mean = ef_mean.max_sharpe()
cleaned_weights_mean = ef_mean.clean_weights()
mvo_weights = np.array([PORTFOLIO_VALUE * cleaned_weights_mean[i] for i in range(len(cleaned_weights_mean))])
mvo_weights  # Dollar amount allocation

array([375410.,      0.,      0.,      0.,      0.,  85810.,      0.,
            0.,      0.,      0., 205910.,      0.,      0.,      0.,
            0.,      0.,      0.,      0.,      0.,      0.,  49370.,
            0.,      0.,      0., 142510., 141010.,      0.,      0.,
            0.])

In [8]:
cleaned_weights_mean

OrderedDict([(0, 0.37541),
             (1, 0.0),
             (2, 0.0),
             (3, 0.0),
             (4, 0.0),
             (5, 0.08581),
             (6, 0.0),
             (7, 0.0),
             (8, 0.0),
             (9, 0.0),
             (10, 0.20591),
             (11, 0.0),
             (12, 0.0),
             (13, 0.0),
             (14, 0.0),
             (15, 0.0),
             (16, 0.0),
             (17, 0.0),
             (18, 0.0),
             (19, 0.0),
             (20, 0.04937),
             (21, 0.0),
             (22, 0.0),
             (23, 0.0),
             (24, 0.14251),
             (25, 0.14101),
             (26, 0.0),
             (27, 0.0),
             (28, 0.0)])

In [9]:
LastPrice = np.array([1/p for p in StockData.tail(1).to_numpy()[0]])
Initial_Portfolio = np.multiply(mvo_weights, LastPrice)
Initial_Portfolio  # number of shares

array([4234.615,    0.   ,    0.   ,    0.   ,    0.   ,  462.082,
          0.   ,    0.   ,    0.   ,    0.   ,  927.12 ,    0.   ,
          0.   ,    0.   ,    0.   ,    0.   ,    0.   ,    0.   ,
          0.   ,    0.   ,  253.205,    0.   ,    0.   ,    0.   ,
        521.427,  756.39 ,    0.   ,    0.   ,    0.   ])

In [10]:
Portfolio_Assets = TradeData @ Initial_Portfolio
MVO_result = pd.DataFrame(Portfolio_Assets, columns=["Mean Var"])
print(f'MVO_result:\n{MVO_result}')

MVO_result:
                 Mean Var
date                     
2020-07-01 1001536.134107
2020-07-02 1004131.147730
2020-07-06 1022196.153661
2020-07-07 1012410.866996
2020-07-08 1027198.165629
...                   ...
2021-10-21 1524363.961577
2021-10-22 1527223.553729
2021-10-25 1533948.898311
2021-10-26 1537081.639312
2021-10-27 1528623.280691

[335 rows x 1 columns]


##########################

# Replicate FinRL Mean Variance Optimization with PyPortfOpt

In [11]:
df = StockData.copy()
print(f'df:\n{df}')

df:
tic             AAPL       AMGN       AXP         BA        CAT        CRM      CSCO       CVX        DIS         GS         HD        HON       IBM      INTC        JNJ       JPM        KO        MCD        MMM       MRK       MSFT       NKE         PG        TRV        UNH          V        VZ       WBA       WMT
date                                                                                                                                                                                                                                                                                                                        
2009-01-02  2.727417  40.791451 14.891695  33.941090  30.233925   8.431122 11.099333 39.716728  20.259752  65.354576  16.048880  22.746801 47.715305  9.516951  37.172798 20.872557 13.757771  40.240009  29.900188 16.682100  14.897431 10.691023  38.529922  30.502073  21.773306  11.876763 13.710851 15.305817 13.433236
2009-01-05  2.842525  41.247849 15.369339  34

In [12]:
import pandas as pd
import numpy as np
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import sample_cov

# Ensure your 'StockData' is a pandas DataFrame.
# If StockData is a numpy array like in your first example, convert it:
# StockData_df = pd.DataFrame(StockData, columns=[...list of tickers...])
# Replace StockData below with your DataFrame.

# 1. Calculate mean returns to match your first script
#    - compounding=False gets the simple arithmetic mean.
#    - frequency=1 prevents annualization.
mu_simple = mean_historical_return(StockData, 
                                   returns_data=False, 
                                   compounding=False, 
                                   frequency=1)

# 2. Calculate sample covariance to match your first script
#    - Use sample_cov to get the standard covariance matrix, equivalent to np.cov().
#    - frequency=1 prevents annualization.
S_simple = sample_cov(StockData, 
                      returns_data=False, 
                      frequency=1)

# 3. Scale the outputs to match your first script's format.
#    Your first script appears to work with percentage returns (e.g., 1.5 for 1.5%),
#    while PyPortfolioOpt uses decimal returns (e.g., 0.015). We must scale the results.

# Scale mean returns by 100
mu_scaled = mu_simple * 100

# Scale covariance matrix by 100*100 = 10,000
S_scaled = S_simple * 10000

# Set precision for printing results
np.set_printoptions(precision=3, suppress = True)

# Display the results
print("Modified PyPortfolioOpt Mean Returns (scaled to match):")
print(mu_scaled)
print("\nModified PyPortfolioOpt Covariance Matrix (scaled to match):")
print(S_scaled)

Modified PyPortfolioOpt Mean Returns (scaled to match):
tic
AAPL   0.136238
AMGN   0.068098
AXP    0.085739
BA     0.082866
CAT    0.065957
CRM    0.133733
CSCO   0.059625
CVX    0.034745
DIS    0.071693
GS     0.056123
HD     0.102826
HON    0.073043
IBM    0.033160
INTC   0.076029
JNJ    0.046885
JPM    0.073116
KO     0.041837
MCD    0.056283
MMM    0.054449
MRK    0.056046
MSFT   0.102761
NKE    0.088691
PG     0.041105
TRV    0.053449
UNH    0.104352
V      0.110135
VZ     0.044446
WBA    0.041852
WMT    0.042443
dtype: float64

Modified PyPortfolioOpt Covariance Matrix (scaled to match):
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                  

# PyPortfOpt Example of Mean Variance Optimization
* https://pyportfolioopt.readthedocs.io/en/latest/UserGuide.html

In [13]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(StockData, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
S = CovarianceShrinkage(StockData).ledoit_wolf()

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

mu.head():
tic
AAPL   0.354397
AMGN   0.149513
AXP    0.168516
BA     0.158303
CAT    0.122437
dtype: float64

S:
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                                                                                                                                                      
AAPL 0.079422 0.026585 0.044063 0.041597 0.042936 0.045229 0.039122 0.032467 0.032466 0.045153 0.032482 0.035691 0.030370 0.041728 0.018456 0.045835 0.017913 0.022027 0.030949 0.020507 0.038911 0.033019 0.018754 0.025602 0.032368 0.036539 0.016385 0.026881 0.015739
AMGN 0.026585 0.064831 0.032552 0.027985 0.029750 0.032874 0.027833 0.026263 0.026044 0.

In [14]:
trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

--- Annualized Returns mu ---
tic
AAPL   0.354397
AMGN   0.149513
AXP    0.168516
BA     0.158303
CAT    0.122437
dtype: float64

--- Converted to Daily Returns ---
tic
AAPL   0.001205
AMGN   0.000553
AXP    0.000618
BA     0.000583
CAT    0.000458
dtype: float64


In [15]:
from pypfopt.efficient_frontier import EfficientFrontier

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))
weights = ef.max_sharpe()

In [16]:
cleaned_weights = ef.clean_weights()
ef.save_weights_to_file("weights.txt")  # saves to file
print(cleaned_weights)

OrderedDict([('AAPL', 0.43768), ('AMGN', 0.0), ('AXP', 0.0), ('BA', 0.0), ('CAT', 0.0), ('CRM', 0.04491), ('CSCO', 0.0), ('CVX', 0.0), ('DIS', 0.0), ('GS', 0.0), ('HD', 0.22789), ('HON', 0.0), ('IBM', 0.0), ('INTC', 0.0), ('JNJ', 0.0), ('JPM', 0.0), ('KO', 0.0), ('MCD', 0.0), ('MMM', 0.0), ('MRK', 0.0), ('MSFT', 0.04164), ('NKE', 0.0), ('PG', 0.0), ('TRV', 0.0), ('UNH', 0.10723), ('V', 0.14065), ('VZ', 0.0), ('WBA', 0.0), ('WMT', 0.0)])


In [21]:
# Create a DataFrame from the tickers and dollar amounts
allocation = pd.DataFrame({
    'Ticker': list(cleaned_weights.keys()),
    'Weight': list(cleaned_weights.values())
})

# Show filtered out rows
filtered = allocation[allocation['Weight'] > 0.01]

print(f'allocation.head():\n{allocation.head()}')
print("\nFiltered rows (weights > 1%):")
print(filtered)

allocation.head():
  Ticker   Weight
0   AAPL 0.437680
1   AMGN 0.000000
2    AXP 0.000000
3     BA 0.000000
4    CAT 0.000000

Filtered rows (weights > 1%):
   Ticker   Weight
0    AAPL 0.437680
5     CRM 0.044910
10     HD 0.227890
20   MSFT 0.041640
24    UNH 0.107230
25      V 0.140650


In [19]:
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

# --- Step 1: Calculate Inputs using your preferred PyPortfolioOpt methods ---
# These are the annualized, compounded mean returns and the shrunk covariance matrix.
# We assume 'StockData' is your pandas DataFrame of prices.
mu = mean_historical_return(StockData, compounding=True, frequency=252)
S = CovarianceShrinkage(StockData).ledoit_wolf()


# --- Step 2: Perform Mean-Variance Optimization ---
# We use the new mu and S. We keep the weight bounds the same for a fair comparison.
# The risk-free rate for max_sharpe defaults to 2%, which is standard.
ef_pyopt = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))

# Find the portfolio weights that maximize the Sharpe ratio
raw_weights_pyopt = ef_pyopt.max_sharpe()

# Clean the weights (rounding and clipping)
cleaned_weights_pyopt = ef_pyopt.clean_weights()
print("Optimal Weights from PyPortfolioOpt (mu, S):")
# The ef.portfolio_performance() is a great way to see the expected results
# ef_pyopt.portfolio_performance(verbose=True)
print(cleaned_weights_pyopt)


# --- Step 3: Convert Weights to Dollar Amounts ---
# Using the same total portfolio value of $1,000,000 as your original code.
portfolio_value = 1000000

# We need to get the weights in the same order as your stock columns.
# cleaned_weights_pyopt is a dictionary, so we map it to the column order.
ordered_weights = np.array([cleaned_weights_pyopt[ticker] for ticker in StockData.columns])
mvo_weights_pyopt = portfolio_value * ordered_weights

print("\nDollar Allocation:")
print(mvo_weights_pyopt)


# --- Step 4: Convert Dollar Amounts to Number of Shares ---
# This logic is identical to your original code: shares = dollar_amount / last_price
last_prices = StockData.iloc[-1].to_numpy()
initial_portfolio_pyopt = mvo_weights_pyopt / last_prices

print("\nNumber of Shares to Purchase:")
print(initial_portfolio_pyopt)


# --- Step 5: Perform the Final Matrix Multiplication ---
# This is the final step you wanted to replicate.
# It uses your 'TradeData' matrix with the new 'initial_portfolio_pyopt' vector.
portfolio_assets_pyopt = TradeData @ initial_portfolio_pyopt

# Store and display the final result in a DataFrame for clarity
pyopt_mvo_result = pd.DataFrame(portfolio_assets_pyopt, columns=["PyOpt_MVO_Result"])

print(f'\nFinal Result (TradeData @ Initial_Portfolio):\n{pyopt_mvo_result}')

Optimal Weights from PyPortfolioOpt (mu, S):
OrderedDict([('AAPL', 0.43768), ('AMGN', 0.0), ('AXP', 0.0), ('BA', 0.0), ('CAT', 0.0), ('CRM', 0.04491), ('CSCO', 0.0), ('CVX', 0.0), ('DIS', 0.0), ('GS', 0.0), ('HD', 0.22789), ('HON', 0.0), ('IBM', 0.0), ('INTC', 0.0), ('JNJ', 0.0), ('JPM', 0.0), ('KO', 0.0), ('MCD', 0.0), ('MMM', 0.0), ('MRK', 0.0), ('MSFT', 0.04164), ('NKE', 0.0), ('PG', 0.0), ('TRV', 0.0), ('UNH', 0.10723), ('V', 0.14065), ('VZ', 0.0), ('WBA', 0.0), ('WMT', 0.0)])

Dollar Allocation:
[437680.      0.      0.      0.      0.  44910.      0.      0.      0.
      0. 227890.      0.      0.      0.      0.      0.      0.      0.
      0.      0.  41640.      0.      0.      0. 107230. 140650.      0.
      0.      0.]

Number of Shares to Purchase:
[4937.019    0.       0.       0.       0.     241.838    0.       0.
    0.       0.    1026.086    0.       0.       0.       0.       0.
    0.       0.       0.       0.     213.56     0.       0.       0.
  392.342  754.4

In [20]:
# You can use this method instead of the simple loop for a more structured output.

# Convert the weights dictionary to a pandas Series
weights_series = pd.Series(cleaned_weights_pyopt)

# Filter for non-zero weights and sort them to see the largest allocations first
sorted_weights = weights_series[weights_series > 0].sort_values(ascending=False)

# Convert to a DataFrame for pretty printing
weights_df = sorted_weights.to_frame("Weight")

# Format the 'Weight' column to display as a percentage
weights_df["Weight"] = weights_df["Weight"].map('{:.2%}'.format)

print("\n--- Optimal Portfolio Weights (Sorted) ---")
print(weights_df)
print("-" * 35)
ef_pyopt.portfolio_performance(verbose=True)


--- Optimal Portfolio Weights (Sorted) ---
      Weight
AAPL  43.77%
HD    22.79%
V     14.06%
UNH   10.72%
CRM    4.49%
MSFT   4.16%
-----------------------------------
Expected annual return: 30.3%
Annual volatility: 21.6%
Sharpe Ratio: 1.40


(0.302676163992619, 0.2155976988341282, 1.4038932958439658)

In [None]:
##############

In [33]:
import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)

# Use pd.read_parquet() to load a parquet file into a DataFrame
# The correct engine name is 'pyarrow'
df = pd.read_parquet(DATA_DIR / 'df_adj_close.parquet', engine='pyarrow')

In [34]:
print(f'df:\n{df}')

df:
Ticker          A    AA   AAL  AAON   AAPL   ABBV  ABEV   ABNB    ABT  ACGL  ACHR   ACI    ACM    ACN   ACWI  ACWX   ADBE   ADC    ADI   ADM    ADP   ADSK  ADT    AEE  AEG    AEM    AEP    AER   AES    AFG    AFL  AFRM   AGCO   AGG   AGI  AGNC   AIG   AIQ  AIRR    AIT    AIZ    AJG  AKAM    AL    ALB   ALC  ALGM   ALGN   ALK    ALL   ALLE  ALLY   ALNY  ALSN    ALV    AM   AMAT  AMCR    AMD    AME   AMGN   AMH  AMLP    AMP    AMT   AMX   AMZN     AN   ANET    AON   AOS   APA    APD   APG    APH    APO    APP   APPF  APTV    AR  ARCC    ARE   ARES   ARGX  ARKK  ARMK    ARW   ASML   ASND    ASR  ASTS   ASX   ATI    ATO    ATR    AU  AUR   AVAV    AVB  AVDE  AVDV  AVEM   AVGO  AVLV  AVTR   AVUS  AVUV    AVY    AWI    AWK   AXON    AXP   AXS  AXTA    AYI   AZN     AZO     B     BA   BABA   BAC    BAH  BALL   BAM    BAP   BAX  BBAX  BBCA  BBD  BBEU  BBIN  BBIO  BBJP   BBUS  BBVA  BBWI   BBY   BCE   BCH   BCS    BDX    BE  BEKE   BEN   BEP  BEPC  BF-A  BF-B   BFAM    BG   BHP   BIDU   BII

In [35]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

mu = mean_historical_return(df, returns_data=False, compounding=True, frequency=252, log_returns=False)  # default setting
S = CovarianceShrinkage(StockData).ledoit_wolf()

pd.set_option('display.float_format', '{:.6f}'.format)

print(f'mu.head():\n{mu.head()}')
print(f'\nS:\n{S}')

mu.head():
Ticker
A      -0.076897
AA     -0.124526
AAL    -0.039457
AAON    0.228968
AAPL    0.241093
dtype: float64

S:
tic      AAPL     AMGN      AXP       BA      CAT      CRM     CSCO      CVX      DIS       GS       HD      HON      IBM     INTC      JNJ      JPM       KO      MCD      MMM      MRK     MSFT      NKE       PG      TRV      UNH        V       VZ      WBA      WMT
tic                                                                                                                                                                                                                                                                      
AAPL 0.079422 0.026585 0.044063 0.041597 0.042936 0.045229 0.039122 0.032467 0.032466 0.045153 0.032482 0.035691 0.030370 0.041728 0.018456 0.045835 0.017913 0.022027 0.030949 0.020507 0.038911 0.033019 0.018754 0.025602 0.032368 0.036539 0.016385 0.026881 0.015739
AMGN 0.026585 0.064831 0.032552 0.027985 0.029750 0.032874 0.027833 0.026263 0.0

In [36]:
trading_days = 252

# The formula works on the entire series seamlessly
daily_returns = (1 + mu)**(1 / trading_days) - 1

print("--- Annualized Returns mu ---")
print(mu.head())
print("\n--- Converted to Daily Returns ---")
print(daily_returns.head())

--- Annualized Returns mu ---
Ticker
A      -0.076897
AA     -0.124526
AAL    -0.039457
AAON    0.228968
AAPL    0.241093
dtype: float64

--- Converted to Daily Returns ---
Ticker
A      -0.000317
AA     -0.000528
AAL    -0.000160
AAON    0.000818
AAPL    0.000857
dtype: float64


In [37]:
from pypfopt.efficient_frontier import EfficientFrontier

ef = EfficientFrontier(mu, S, weight_bounds=(0, 0.5))
weights = ef.max_sharpe()

ValueError: Covariance matrix does not match expected returns