In [66]:
# importing required libraries
import numpy as np
import pandas as pd

import datetime as dt
import yfinance as yf
import pandas_datareader.data as web
import cpi
import wbdata

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from scipy.optimize import minimize

ModuleNotFoundError: No module named 'wbdata'

In [67]:
#!pip install yfinance --upgrade --no-cache-dir

In [68]:
# random selection

# customize seed for different stocks
np.random.seed(42)

# list of all potential stocks (can be customized)
ticker_list = ['AAPL','AXP', 'BAC', 'C', 'CSCO', 'GS', 'IBM', 'INTC', 'JPM', 'MSFT', 'NVDA', 'CRM', 'QCOM', 'NOW', 'ORCL', 'AVGO', 'TXN', 'GDDY',
               'WIX', 'TSM', 'TSLA', 'SNOW', 'HUBS', 'DOCU', 'MS']

# selecting 15 random stocks
stock_list = np.random.choice(ticker_list,15,replace=False)
print(f'These are the fifteen stocks assigned to you: {" ".join(stock_list)}')

# sorting the list from A-Z
stock_list = list(np.sort(stock_list))

These are the fifteen stocks assigned to you: JPM TXN AAPL DOCU CRM MSFT NOW AXP HUBS GS BAC QCOM AVGO C CSCO


In [69]:
# finding optimal historical portfolio since 2000 to today, using 15 stocks. 
# Assume no short-selling constraints

# defining start and end date
start = dt.datetime(2000, 1, 1)
end = dt.datetime(2024, 12, 31)

# getting returns of the stocks
# calculates returns using adjusted close price
# ensures that we have returns of only those dates where all companies have available information
returns = yf.download(stock_list, start-pd.offsets.BDay(1), end+pd.offsets.BDay(1), auto_adjust=False)['Adj Close'].pct_change().dropna()

# gets all returns Close, high, low, open, volume
#returns = yf.download(stock_list, start, end)

returns.head()

[*********************100%***********************]  15 of 15 completed


Ticker,AAPL,AVGO,AXP,BAC,C,CRM,CSCO,DOCU,GS,HUBS,JPM,MSFT,NOW,QCOM,TXN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-04-30,0.018112,-0.014434,-0.008633,-0.007629,-0.010437,0.006405,-0.009394,-0.027687,-0.00613,-0.018081,-0.005667,-0.024004,0.01814,-0.001956,-0.010536
2018-05-01,0.023236,0.004184,-0.001722,0.001003,-0.000293,0.017522,0.012192,0.021227,-0.006965,-0.005666,0.0,0.015826,-0.004334,-0.003725,0.019422
2018-05-02,0.044175,-0.007119,-0.009637,-0.012354,-0.003809,-0.002356,-0.021637,-0.003295,-0.01107,0.014245,-0.007906,-0.015684,0.000484,-0.01102,-0.004255
2018-05-03,0.001812,-0.015258,-0.005019,-0.012846,-0.000735,0.012946,0.013224,0.067141,-0.002735,0.0103,-0.006301,0.005989,0.019517,0.000995,0.006216
2018-05-04,0.039233,0.023397,0.012456,0.003425,0.004732,0.005707,0.019352,-0.020734,0.006555,0.019926,0.011097,0.011587,0.014757,0.043332,0.016605


In [70]:
# getting monthly returns
returns_mon = returns.resample(rule = 'ME').apply(lambda x: x.add(1).prod().sub(1))
returns_mon

Ticker,AAPL,AVGO,AXP,BAC,C,CRM,CSCO,DOCU,GS,HUBS,JPM,MSFT,NOW,QCOM,TXN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-04-30,0.018112,-0.014434,-0.008633,-0.007629,-0.010437,0.006405,-0.009394,-0.027687,-0.006130,-0.018081,-0.005667,-0.024004,0.018140,-0.001956,-0.010536
2018-05-31,0.135125,0.098727,-0.004557,-0.025446,-0.018520,0.068931,-0.035674,0.289412,-0.048887,0.144476,-0.016271,0.061467,0.069038,0.151289,0.109965
2018-06-30,-0.009418,-0.024502,-0.003052,-0.029270,0.003448,0.054666,0.007492,0.063040,-0.023508,0.034654,-0.026260,-0.002327,-0.028940,-0.034412,-0.014834
2018-07-31,0.027983,-0.086012,0.019156,0.095424,0.074268,0.005499,-0.009537,0.017941,0.076438,-0.010367,0.109162,0.075753,0.020235,0.142017,0.015296
2018-08-31,0.200422,-0.012355,0.064912,0.001619,-0.002795,0.113234,0.129581,0.158442,0.004917,0.157937,-0.003219,0.062992,0.115935,0.072086,0.009702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.032353,0.013319,0.022170,0.010915,-0.025235,-0.022797,0.043137,0.067231,0.008327,0.004104,0.056391,-0.001095,0.049866,-0.031224,0.051666
2024-09-30,0.017467,0.062937,0.048521,-0.019909,-0.000639,0.083985,0.053027,0.048640,-0.029672,0.065181,-0.062011,0.031548,0.046070,-0.024936,-0.036251
2024-10-31,-0.030429,-0.015826,-0.001528,0.053931,0.025080,0.064521,0.036989,0.117410,0.045808,0.043623,0.058901,-0.055659,0.043158,-0.042811,-0.010033
2024-11-30,0.051707,-0.045297,0.128110,0.136059,0.114204,0.132546,0.081066,0.148602,0.175322,0.299681,0.125270,0.044192,0.124814,-0.026049,-0.010484


In [71]:
# S&P 500
sp500 = yf.Ticker("^GSPC")
sp500_data = sp500.history(period="25y")
sp500_data['sp500'] = sp500_data['Close'].pct_change()
sp500_data = sp500_data.drop(columns=["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"])


sp500_data.index = sp500_data.index.strftime('%m-%d-%Y')
sp500_data.index = pd.to_datetime(sp500_data.index)



In [72]:
type(sp500_data.index[0])

pandas._libs.tslibs.timestamps.Timestamp

In [73]:
sp500_mon = sp500_data.resample(rule = 'ME').apply(lambda x: x.add(1).prod().sub(1))
sp500_mon

Unnamed: 0_level_0,sp500
Date,Unnamed: 1_level_1
2000-03-31,0.028799
2000-04-30,-0.030796
2000-05-31,-0.021915
2000-06-30,0.023934
2000-07-31,-0.016341
...,...
2024-11-30,0.057301
2024-12-31,-0.024990
2025-01-31,0.027016
2025-02-28,-0.014242


In [74]:
returns_mon.index

DatetimeIndex(['2018-04-30', '2018-05-31', '2018-06-30', '2018-07-31',
               '2018-08-31', '2018-09-30', '2018-10-31', '2018-11-30',
               '2018-12-31', '2019-01-31', '2019-02-28', '2019-03-31',
               '2019-04-30', '2019-05-31', '2019-06-30', '2019-07-31',
               '2019-08-31', '2019-09-30', '2019-10-31', '2019-11-30',
               '2019-12-31', '2020-01-31', '2020-02-29', '2020-03-31',
               '2020-04-30', '2020-05-31', '2020-06-30', '2020-07-31',
               '2020-08-31', '2020-09-30', '2020-10-31', '2020-11-30',
               '2020-12-31', '2021-01-31', '2021-02-28', '2021-03-31',
               '2021-04-30', '2021-05-31', '2021-06-30', '2021-07-31',
               '2021-08-31', '2021-09-30', '2021-10-31', '2021-11-30',
               '2021-12-31', '2022-01-31', '2022-02-28', '2022-03-31',
               '2022-04-30', '2022-05-31', '2022-06-30', '2022-07-31',
               '2022-08-31', '2022-09-30', '2022-10-31', '2022-11-30',
      

In [75]:
# combining S&P500 with stock list

result = pd.merge(returns_mon, sp500_mon, how='left', left_index=True, right_index=True)
result

Unnamed: 0_level_0,AAPL,AVGO,AXP,BAC,C,CRM,CSCO,DOCU,GS,HUBS,JPM,MSFT,NOW,QCOM,TXN,sp500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018-04-30,0.018112,-0.014434,-0.008633,-0.007629,-0.010437,0.006405,-0.009394,-0.027687,-0.006130,-0.018081,-0.005667,-0.024004,0.018140,-0.001956,-0.010536,0.002719
2018-05-31,0.135125,0.098727,-0.004557,-0.025446,-0.018520,0.068931,-0.035674,0.289412,-0.048887,0.144476,-0.016271,0.061467,0.069038,0.151289,0.109965,0.021608
2018-06-30,-0.009418,-0.024502,-0.003052,-0.029270,0.003448,0.054666,0.007492,0.063040,-0.023508,0.034654,-0.026260,-0.002327,-0.028940,-0.034412,-0.014834,0.004842
2018-07-31,0.027983,-0.086012,0.019156,0.095424,0.074268,0.005499,-0.009537,0.017941,0.076438,-0.010367,0.109162,0.075753,0.020235,0.142017,0.015296,0.036022
2018-08-31,0.200422,-0.012355,0.064912,0.001619,-0.002795,0.113234,0.129581,0.158442,0.004917,0.157937,-0.003219,0.062992,0.115935,0.072086,0.009702,0.030263
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.032353,0.013319,0.022170,0.010915,-0.025235,-0.022797,0.043137,0.067231,0.008327,0.004104,0.056391,-0.001095,0.049866,-0.031224,0.051666,0.022835
2024-09-30,0.017467,0.062937,0.048521,-0.019909,-0.000639,0.083985,0.053027,0.048640,-0.029672,0.065181,-0.062011,0.031548,0.046070,-0.024936,-0.036251,0.020197
2024-10-31,-0.030429,-0.015826,-0.001528,0.053931,0.025080,0.064521,0.036989,0.117410,0.045808,0.043623,0.058901,-0.055659,0.043158,-0.042811,-0.010033,-0.009897
2024-11-30,0.051707,-0.045297,0.128110,0.136059,0.114204,0.132546,0.081066,0.148602,0.175322,0.299681,0.125270,0.044192,0.124814,-0.026049,-0.010484,0.057301


In [76]:
# get monthly std dev (volatility)
# get monthly SPX returns (historical)

In [77]:
rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)

rf.index = rf.index.to_timestamp(how='end').normalize()
rf

  rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)
  rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)


Unnamed: 0_level_0,RF
Date,Unnamed: 1_level_1
2000-01-31,0.0041
2000-02-29,0.0043
2000-03-31,0.0047
2000-04-30,0.0046
2000-05-31,0.0050
...,...
2024-08-31,0.0048
2024-09-30,0.0040
2024-10-31,0.0039
2024-11-30,0.0040


In [78]:
# Align indices of returns_mon and rf['RF']
aligned_rf = rf.reindex(returns_mon.index, method='pad')

In [79]:
aligned_rf

Unnamed: 0_level_0,RF
Date,Unnamed: 1_level_1
2018-04-30,0.0014
2018-05-31,0.0014
2018-06-30,0.0014
2018-07-31,0.0016
2018-08-31,0.0016
...,...
2024-08-31,0.0048
2024-09-30,0.0040
2024-10-31,0.0039
2024-11-30,0.0040


In [80]:
result = pd.merge(result, rf, how='left', left_index=True, right_index=True)


In [81]:
result

Unnamed: 0_level_0,AAPL,AVGO,AXP,BAC,C,CRM,CSCO,DOCU,GS,HUBS,JPM,MSFT,NOW,QCOM,TXN,sp500,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-04-30,0.018112,-0.014434,-0.008633,-0.007629,-0.010437,0.006405,-0.009394,-0.027687,-0.006130,-0.018081,-0.005667,-0.024004,0.018140,-0.001956,-0.010536,0.002719,0.0014
2018-05-31,0.135125,0.098727,-0.004557,-0.025446,-0.018520,0.068931,-0.035674,0.289412,-0.048887,0.144476,-0.016271,0.061467,0.069038,0.151289,0.109965,0.021608,0.0014
2018-06-30,-0.009418,-0.024502,-0.003052,-0.029270,0.003448,0.054666,0.007492,0.063040,-0.023508,0.034654,-0.026260,-0.002327,-0.028940,-0.034412,-0.014834,0.004842,0.0014
2018-07-31,0.027983,-0.086012,0.019156,0.095424,0.074268,0.005499,-0.009537,0.017941,0.076438,-0.010367,0.109162,0.075753,0.020235,0.142017,0.015296,0.036022,0.0016
2018-08-31,0.200422,-0.012355,0.064912,0.001619,-0.002795,0.113234,0.129581,0.158442,0.004917,0.157937,-0.003219,0.062992,0.115935,0.072086,0.009702,0.030263,0.0016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.032353,0.013319,0.022170,0.010915,-0.025235,-0.022797,0.043137,0.067231,0.008327,0.004104,0.056391,-0.001095,0.049866,-0.031224,0.051666,0.022835,0.0048
2024-09-30,0.017467,0.062937,0.048521,-0.019909,-0.000639,0.083985,0.053027,0.048640,-0.029672,0.065181,-0.062011,0.031548,0.046070,-0.024936,-0.036251,0.020197,0.0040
2024-10-31,-0.030429,-0.015826,-0.001528,0.053931,0.025080,0.064521,0.036989,0.117410,0.045808,0.043623,0.058901,-0.055659,0.043158,-0.042811,-0.010033,-0.009897,0.0039
2024-11-30,0.051707,-0.045297,0.128110,0.136059,0.114204,0.132546,0.081066,0.148602,0.175322,0.299681,0.125270,0.044192,0.124814,-0.026049,-0.010484,0.057301,0.0040


In [82]:
# Fetch CPI data from FRED
cpi = web.DataReader('CPIAUCSL', 'fred', start, end)

# Calculate monthly inflation rates as percentage change
inflation_rate = cpi.pct_change().dropna()

# Optionally normalize the index to end-of-month timestamps
inflation_rate.index = inflation_rate.index.to_period('M').to_timestamp(how='end').normalize()

inflation_rate.columns = ['Inflation Rate']
inflation_rate

Unnamed: 0_level_0,Inflation Rate
DATE,Unnamed: 1_level_1
2000-02-29,0.004135
2000-03-31,0.005882
2000-04-30,-0.000585
2000-05-31,0.001755
2000-06-30,0.005841
...,...
2024-08-31,0.001802
2024-09-30,0.002292
2024-10-31,0.002265
2024-11-30,0.002805


In [None]:
# import wbdata
# # Define the country code (for the US, it's 'USA')
# country = "USA"

# # Define the indicator for inflation (Consumer Price Index)
# indicator = {'FP.CPI.TOTL.ZG': 'Inflation'}

# # Define the time range
# data_date = dt.datetime(2000, 1, 1), dt.datetime(2025, 1, 1)

# # Fetch the data
# inflation_data = wbdata.get_dataframe(indicator, country=country, freq='M')
# print(inflation_data)

      Inflation
date           
2023   4.116338
2022   8.002800
2021   4.697859
2020   1.233584
2019   1.812210
...         ...
1964   1.278912
1963   1.239669
1962   1.198773
1961   1.070724
1960   1.457976

[64 rows x 1 columns]
