In [2]:
# importing required libraries
import numpy as np
import pandas as pd

import datetime as dt
import yfinance as yf
import pandas_datareader.data as web
import cpi
import wbdata

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from scipy.optimize import minimize

In [3]:
# customize seed for different stocks
np.random.seed(42)

# list of all potential stocks (can be customized)
ticker_list = ['AAPL','AXP', 'BAC', 'C', 'CSCO', 'GS', 'IBM', 'INTC', 'JPM', 'MSFT', 'NVDA', 'CRM', 'QCOM', 'NOW', 'ORCL', 'AVGO', 'TXN', 'GDDY',
               'WIX', 'TSM', 'TSLA', 'SNOW', 'HUBS', 'DOCU', 'MS']

# selecting 15 random stocks
stock_list = np.random.choice(ticker_list,15,replace=False)
print(f'These are the fifteen stocks assigned to you: {" ".join(stock_list)}')

# sorting the list from A-Z
stock_list = list(np.sort(stock_list))

These are the fifteen stocks assigned to you: JPM TXN AAPL DOCU CRM MSFT NOW AXP HUBS GS BAC QCOM AVGO C CSCO


In [4]:
# finding optimal historical portfolio since 2000 to today, using 15 stocks. 
# Assume no short-selling constraints

# defining start and end date
start = dt.datetime(2000, 1, 1)
end = dt.datetime(2024, 12, 31)

# getting returns of the stocks
# calculates returns using adjusted close price
# ensures that we have returns of only those dates where all companies have available information
returns = yf.download(stock_list, start-pd.offsets.BDay(1), end+pd.offsets.BDay(1), auto_adjust=False)['Adj Close'].pct_change().dropna()

# gets all returns Close, high, low, open, volume
#returns = yf.download(stock_list, start, end)

# getting monthly returns

# S&P 500
sp500 = yf.Ticker("^GSPC")
sp500_data = sp500.history(period="25y")
sp500_data['SP500'] = sp500_data['Close'].pct_change()
sp500_data = sp500_data.drop(columns=["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"])


sp500_data.index = sp500_data.index.strftime('%m-%d-%Y')
sp500_data.index = pd.to_datetime(sp500_data.index)

# sp500_mon = sp500_data.resample(rule = 'ME').apply(lambda x: x.add(1).prod().sub(1))



[*********************100%***********************]  15 of 15 completed


In [5]:
returns = pd.merge(returns, sp500_data, left_index = True, right_index = True )

returns_mon = returns.resample(rule = 'ME').apply(lambda x: x.add(1).prod().sub(1))
returns_mon.index

DatetimeIndex(['2018-04-30', '2018-05-31', '2018-06-30', '2018-07-31',
               '2018-08-31', '2018-09-30', '2018-10-31', '2018-11-30',
               '2018-12-31', '2019-01-31', '2019-02-28', '2019-03-31',
               '2019-04-30', '2019-05-31', '2019-06-30', '2019-07-31',
               '2019-08-31', '2019-09-30', '2019-10-31', '2019-11-30',
               '2019-12-31', '2020-01-31', '2020-02-29', '2020-03-31',
               '2020-04-30', '2020-05-31', '2020-06-30', '2020-07-31',
               '2020-08-31', '2020-09-30', '2020-10-31', '2020-11-30',
               '2020-12-31', '2021-01-31', '2021-02-28', '2021-03-31',
               '2021-04-30', '2021-05-31', '2021-06-30', '2021-07-31',
               '2021-08-31', '2021-09-30', '2021-10-31', '2021-11-30',
               '2021-12-31', '2022-01-31', '2022-02-28', '2022-03-31',
               '2022-04-30', '2022-05-31', '2022-06-30', '2022-07-31',
               '2022-08-31', '2022-09-30', '2022-10-31', '2022-11-30',
      

In [6]:
print(returns.loc[(returns.index.year == 2019) & (returns.index.month == 1)]['SP500'].std())

# Calculating STDEV for each month
monthly_std = returns.groupby(by=[returns.index.month, returns.index.year]).std()
monthly_std.index = pd.to_datetime(monthly_std.index.map(lambda x: f"{x[1]}-{x[0]}"), format="%Y-%m")
monthly_std.sort_index(inplace=True)
monthly_std.index = returns_mon.index

monthly_std.head()

0.011677586389697103


Unnamed: 0_level_0,AAPL,AVGO,AXP,BAC,C,CRM,CSCO,DOCU,GS,HUBS,JPM,MSFT,NOW,QCOM,TXN,SP500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018-04-30,,,,,,,,,,,,,,,,
2018-05-31,0.01377,0.01264,0.01036,0.014239,0.015037,0.011716,0.012305,0.026296,0.010957,0.015443,0.014172,0.008415,0.013723,0.015838,0.010325,0.00655
2018-06-30,0.008709,0.01344,0.008554,0.011222,0.010926,0.012263,0.01063,0.038368,0.009118,0.029476,0.010105,0.010327,0.018725,0.01073,0.012988,0.005575
2018-07-31,0.009658,0.035898,0.012573,0.015174,0.013644,0.016388,0.013935,0.032875,0.009614,0.018243,0.013648,0.013346,0.022173,0.020328,0.012673,0.005448
2018-08-31,0.014122,0.01263,0.006729,0.008901,0.011724,0.010659,0.008982,0.031341,0.011369,0.021076,0.007395,0.007513,0.015087,0.00931,0.013441,0.004431


In [7]:
# Finding risk free-rate
rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)
rf.index = rf.index.to_timestamp(how='end').normalize()



# Fetch CPI data from FRED
cpi = web.DataReader('CPIAUCSL', 'fred', start, end)

# Calculate monthly inflation rates as percentage change
inflation_rate = cpi.pct_change().dropna()
# Optionally normalize the index to end-of-month timestamps
inflation_rate.index = inflation_rate.index.to_period('M').to_timestamp(how='end').normalize()
inflation_rate.columns = ['Inflation Rate']



# Rolling Average
rolling_avg = returns.rolling(window=30).mean()
rolling_avg.columns = [name+"_ROLL_AVG" for name in rolling_avg.columns]

  rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)
  rf = web.DataReader('F-F_Research_Data_Factors','famafrench', start, end)[0][['RF']].div(100)


In [8]:
result = pd.concat([returns_mon, monthly_std], axis=1, keys=['RETURN', 'STDEV'])
result.columns = result.columns.get_level_values(1) + '_' + result.columns.get_level_values(0).astype(str)


result = pd.merge(result, rf, how='left', left_index=True, right_index=True)
result = pd.merge(result, inflation_rate, how = 'left', left_index=True, right_index=True) 
result = pd.merge(result, rolling_avg, how = 'left', left_index=True, right_index=True)


In [9]:
# pd.set_option('display.max_rows', None)
pd.reset_option('display.max_rows')

In [10]:
result


Unnamed: 0_level_0,AAPL_RETURN,AVGO_RETURN,AXP_RETURN,BAC_RETURN,C_RETURN,CRM_RETURN,CSCO_RETURN,DOCU_RETURN,GS_RETURN,HUBS_RETURN,...,CSCO_ROLL_AVG,DOCU_ROLL_AVG,GS_ROLL_AVG,HUBS_ROLL_AVG,JPM_ROLL_AVG,MSFT_ROLL_AVG,NOW_ROLL_AVG,QCOM_ROLL_AVG,TXN_ROLL_AVG,SP500_ROLL_AVG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-04-30,0.018112,-0.014434,-0.008634,-0.007628,-0.010436,0.006405,-0.009394,-0.027687,-0.006130,-0.018081,...,,,,,,,,,,
2018-05-31,0.135124,0.098727,-0.004557,-0.025446,-0.018520,0.068931,-0.035674,0.289412,-0.048887,0.144476,...,,,,,,,,,,
2018-06-30,-0.009418,-0.024502,-0.003052,-0.029270,0.003449,0.054666,0.007492,0.063040,-0.023508,0.034654,...,,,,,,,,,,
2018-07-31,0.027983,-0.086012,0.019156,0.095423,0.074268,0.005499,-0.009537,0.017941,0.076438,-0.010367,...,-0.001139,-0.005023,0.000911,-0.003804,0.002290,0.001763,-0.001706,0.002819,-0.000642,0.000524
2018-08-31,0.200423,-0.012355,0.064911,0.001619,-0.002795,0.113234,0.129581,0.158442,0.004917,0.157937,...,0.004341,0.006517,0.001031,0.002738,0.001008,0.002034,0.001099,0.005438,-0.000507,0.001177
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-31,0.032353,0.013318,0.022170,0.010915,-0.025235,-0.022797,0.043137,0.067231,0.008327,0.004104,...,,,,,,,,,,
2024-09-30,0.017467,0.062937,0.048521,-0.019909,-0.000639,0.083985,0.053027,0.048640,-0.029672,0.065181,...,0.002484,0.003163,-0.000277,0.002284,-0.000395,0.000986,0.002729,0.000024,0.001187,0.001259
2024-10-31,-0.030429,-0.015826,-0.001528,0.053931,0.025080,0.064521,0.036989,0.117410,0.045808,0.043623,...,0.002396,0.006218,0.000980,0.001649,0.002047,-0.002453,0.000647,-0.001991,-0.000362,-0.000029
2024-11-30,0.051707,-0.045297,0.128110,0.136059,0.114204,0.132546,0.081066,0.148602,0.175322,0.299681,...,,,,,,,,,,


# MLP 

lstm rnn