In [1]:
import yfinance as yf
import pandas as pd
import datetime as dt
import os
import numpy as np
import math

* Q1 - rebalance portfolio 
* Q2 - define period for which we take into account the size (Market Cap) - moving average of the size over the period Rebalancing every month
* Q3 - Add rebalancing every REB months e.g. every 12 months, so you keep the same portfolio composition for 12 months each time.
* Q4 - change percentage of companies taken to each of 2 portfolios

In [2]:

def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.

    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # dropping na rows
    dfg.dropna(inplace = True)
    return dfg[column]

    
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided

    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';')


In [3]:
tick = "NKE MSFT XOM INTC CAT WMT JPM F UPS MKC" # stocks universe
st = '2000-12-01'
en = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']

stocks_data = pull_stock_data(tick, st, en, intv, strpdt, col)
stocks_data.info()

[*********************100%***********************]  10 of 10 completed
<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 241 entries, 2000-12 to 2020-12
Freq: M
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   (Close, CAT)    241 non-null    float64
 1   (Close, F)      241 non-null    float64
 2   (Close, INTC)   241 non-null    float64
 3   (Close, JPM)    241 non-null    float64
 4   (Close, MKC)    241 non-null    float64
 5   (Close, MSFT)   241 non-null    float64
 6   (Close, NKE)    241 non-null    float64
 7   (Close, UPS)    241 non-null    float64
 8   (Close, WMT)    241 non-null    float64
 9   (Close, XOM)    241 non-null    float64
 10  (Volume, CAT)   241 non-null    int64  
 11  (Volume, F)     241 non-null    int64  
 12  (Volume, INTC)  241 non-null    int64  
 13  (Volume, JPM)   241 non-null    int64  
 14  (Volume, MKC)   241 non-null    int64  
 15  (Volume, MSFT)  241 non-null    int



calculating the stock returns

In [4]:
stocks_data['Close'] = stocks_data['Close'].pct_change()

calculating stock excess return - *not sure if that's actually needed*

In [19]:
fama = pd.read_csv(r"F-F_Research_Data_Factors.CSV", sep=';')
fama['Date'] = pd.to_datetime(fama['Date'].astype('string'), format='%Y%m')
fama.set_index('Date', inplace=True)
fama.index = fama.index.to_period('m')
# Values given in CSV are plain percentages - division by 100
fama = fama.divide(100)
fama.head()

# tickers = stock_returns_full.columns.tolist()
# for i in tickers[:10]:
#     stock_returns_full[i + '_excess_return'] = stock_returns_full[i] - stock_returns_full['RF']

In [None]:
# Join risk free rates to stock returns in periods
dffull = pd.merge(df1,fama, how='left', left_index=True, right_index=True)
# drop first NULL row which is causing regression to fail
dffull = dffull.dropna()

In [None]:
# Calculate excess stock return for each company
for i in dffull.columns.tolist()[:10]:
    dffull[i+'_re'] = dffull[i]-dffull['RF']
# Calculate excess Market return
dffull['reM'] = dffull['Mkt-RF']-dffull['RF']
dffull.info()

### Portfolio building functions

In [None]:
# reshape DataFrame
c = stocks_data['Close'].melt(ignore_index=False, value_name="return", var_name='company') #.reset_index()
v = stocks_data['Volume'].melt(ignore_index=False, value_name="volume", var_name='company')
df = pd.merge(c, v, on=["Date",'company'])
df

In [None]:
def rebalance(df_slice, sort_var, p1, p2):
    """
    Rebalancing returns a list of 2 (for now) portfolios based on the value of decision variable in period taken into account. Period is defined by the length of df_slice per each company.
    TODO: Adjust to return "n" portfolios - this would require changing to sth more sophisticated than head/tail and adjusting form in which percentages are given
    """
    # 1st part - decide on size of each portoflio - based on universe size and percentages assigned to each class of stocks (p1, p2)
    universe_size = df_slice['company'].nunique()
    small_cnt = int((universe_size*p1)//1)
    big_cnt = round(universe_size*p2)

    period_avg = df_slice.groupby('company').mean().reset_index()
    temp_sort = period_avg.sort_values(sort_var, ascending = False)
    # probably some loop over every percentage to get proper companies from sorted list
    # we need to return only the names of companies, because the values of returns at this stage are already aggregated for the whole `period`
    small_comp = temp_sort.tail(small_cnt)['company'].values
    big_comp = temp_sort.head(big_cnt)['company'].values

    return [small_comp, big_comp]


def get_portfolio_stats(date, portfolios, df):
    """
    We are interested in average size (mkt_cap), average return and composition of each portfolio
    Function iterates over list of portfolios and calculates metrics for each of them
    Returns dict of statistics with portfolio number which is used later to construct pd.DataFrame
    """
    ptfs = {}
    for idx, p in enumerate(portfolios, 1):
        p_period_df = df.loc[(df.index == date) & (df.company.isin(p))]
        
        p_name = "ptf_" + str(idx)
        ptfs[p_name+"_return"] = p_period_df['return'].mean()
        ptfs[p_name+"_volume"] = p_period_df['volume'].mean()
        ptfs[p_name+"_stocks"] = p

    return ptfs

def constrcut_portfolios(df, decision_var, reb, per, small_p, big_p):
    """

    """
    test_l = []
    for p_idx, d in enumerate(df.index, 0):
        if p_idx%reb == 0:
            # define slice based on periods taken into account for rebalancing decision (rebalancing could be every 12 months, but periods considered could be e.g. 6 months)
            # TODO: NOT SURE IF THE VALUE OF DECISION VARIABLE SHOULD BE IN PERIOD T OR T-1 
            # if so it's matter of adjusting it here should be -> slice = df[(df.index >= d - per) & (df.index < d)]
            slice = df[(df.index >= d - per+1) & (df.index <= d)]
            
            # return portfolios after rebalancing
            portfolios = rebalance(slice, decision_var, small_p, big_p)
            
        # this is done for each period 
        # calculation of metrics
        stats = get_portfolio_stats(d, portfolios, df)
        test_l.append(stats)
    
    return pd.DataFrame(test_l, index=df.index)


In [59]:
# TESTING PERIODS - t-1
per = 1
d = df.index[1]
slice = df[(df.index >= d - per) & (df.index < d)]
slice

Unnamed: 0_level_0,company,return,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-12,CAT,,3051800
2000-12,F,,4746800
2000-12,INTC,,54053600
2000-12,JPM,,9657000
2000-12,MKC,,998000
2000-12,MSFT,,99977600
2000-12,NKE,,8772800
2000-12,UPS,,1096000
2000-12,WMT,,6582300
2000-12,XOM,,9564000


QUESTION 1 - portfolios sorted based on the past size of company.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [48]:
Q1 = constrcut_portfolios(df=df, decision_var="volume", reb=1, per=1, small_p=.5, big_p=.5)
Q1

PeriodIndex(['2000-12'], dtype='period[M]', name='Date')
PeriodIndex(['2000-12', '2001-01'], dtype='period[M]', name='Date')
PeriodIndex(['2001-01', '2001-02'], dtype='period[M]', name='Date')
PeriodIndex(['2001-02', '2001-03'], dtype='period[M]', name='Date')
PeriodIndex(['2001-03', '2001-04'], dtype='period[M]', name='Date')
PeriodIndex(['2001-04', '2001-05'], dtype='period[M]', name='Date')
PeriodIndex(['2001-05', '2001-06'], dtype='period[M]', name='Date')
PeriodIndex(['2001-06', '2001-07'], dtype='period[M]', name='Date')
PeriodIndex(['2001-07', '2001-08'], dtype='period[M]', name='Date')
PeriodIndex(['2001-08', '2001-09'], dtype='period[M]', name='Date')
PeriodIndex(['2001-09', '2001-10'], dtype='period[M]', name='Date')
PeriodIndex(['2001-10', '2001-11'], dtype='period[M]', name='Date')
PeriodIndex(['2001-11', '2001-12'], dtype='period[M]', name='Date')
PeriodIndex(['2001-12', '2002-01'], dtype='period[M]', name='Date')
PeriodIndex(['2002-01', '2002-02'], dtype='period[M]', name

Unnamed: 0_level_0,ptf_1_return,ptf_1_volume,ptf_1_stocks,ptf_2_return,ptf_2_volume,ptf_2_stocks
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-12,,3294980.0,"[WMT, F, CAT, UPS, MKC]",,36405000.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-01,0.037479,3779360.0,"[NKE, F, CAT, UPS, MKC]",0.177179,38957900.0,"[MSFT, INTC, XOM, JPM, WMT]"
2001-02,-0.039885,3477900.0,"[WMT, F, CAT, UPS, MKC]",-0.148126,35719760.0,"[MSFT, INTC, NKE, XOM, JPM]"
2001-03,0.032231,3597500.0,"[WMT, F, CAT, MKC, UPS]",-0.031277,32907900.0,"[MSFT, INTC, NKE, XOM, JPM]"
2001-04,0.032258,4405520.0,"[NKE, F, CAT, MKC, UPS]",0.120112,29007320.0,"[MSFT, INTC, XOM, JPM, WMT]"
...,...,...,...,...,...,...
2020-08,0.098878,5241040.0,"[WMT, UPS, CAT, NKE, MKC]",0.037041,32891360.0,"[F, INTC, MSFT, XOM, JPM]"
2020-09,0.027482,6247880.0,"[WMT, NKE, UPS, CAT, MKC]",-0.050831,31221860.0,"[F, INTC, MSFT, XOM, JPM]"
2020-10,-0.025193,4996360.0,"[WMT, NKE, UPS, CAT, MKC]",-0.010596,45232160.0,"[F, INTC, MSFT, XOM, JPM]"
2020-11,0.090591,5886920.0,"[NKE, WMT, UPS, CAT, MKC]",0.139027,43478740.0,"[F, XOM, INTC, MSFT, JPM]"


QUESTION 2 - portfolios sorted based on the average past size of company in previous n periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [41]:
# period = 1
# rebalance_period = 1
# small_p = .5
# big_p = .5

Q2 = constrcut_portfolios(df=df, decision_var="volume", reb=1, per=6, small_p=.5, big_p=.5)
Q2

Unnamed: 0_level_0,ptf_1_return,ptf_1_volume,ptf_1_stocks,ptf_2_return,ptf_2_volume,ptf_2_stocks
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-12,,3294980.0,"[WMT, F, CAT, UPS, MKC]",,36405000.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-01,0.037479,3779360.0,"[NKE, F, CAT, UPS, MKC]",0.177179,38957900.0,"[MSFT, INTC, XOM, JPM, WMT]"
2001-02,-0.039885,3477900.0,"[WMT, F, CAT, UPS, MKC]",-0.148126,35719760.0,"[MSFT, INTC, NKE, XOM, JPM]"
2001-03,0.032231,3597500.0,"[WMT, F, CAT, UPS, MKC]",-0.031277,32907900.0,"[MSFT, INTC, XOM, NKE, JPM]"
2001-04,0.029932,3848760.0,"[WMT, F, CAT, UPS, MKC]",0.122438,29564080.0,"[MSFT, INTC, XOM, NKE, JPM]"
...,...,...,...,...,...,...
2020-08,0.098878,5241040.0,"[WMT, NKE, UPS, CAT, MKC]",0.037041,32891360.0,"[F, MSFT, INTC, XOM, JPM]"
2020-09,0.027482,6247880.0,"[WMT, NKE, UPS, CAT, MKC]",-0.050831,31221860.0,"[F, MSFT, INTC, XOM, JPM]"
2020-10,-0.025193,4996360.0,"[WMT, NKE, UPS, CAT, MKC]",-0.010596,45232160.0,"[F, INTC, MSFT, XOM, JPM]"
2020-11,0.090591,5886920.0,"[WMT, NKE, UPS, CAT, MKC]",0.139027,43478740.0,"[F, INTC, MSFT, XOM, JPM]"


QUESTION 3 - portfolios sorted based on the average past size of company in previous n periods, portfolio components change every REB periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [42]:
Q3 = constrcut_portfolios(df=df, decision_var="volume", reb=12, per=6, small_p=.5, big_p=.5)
Q3

Unnamed: 0_level_0,ptf_1_return,ptf_1_volume,ptf_1_stocks,ptf_2_return,ptf_2_volume,ptf_2_stocks
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-12,,3294980.0,"[WMT, F, CAT, UPS, MKC]",,36405000.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-01,0.054190,4467380.0,"[WMT, F, CAT, UPS, MKC]",0.160468,38269880.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-02,-0.039885,3477900.0,"[WMT, F, CAT, UPS, MKC]",-0.148126,35719760.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-03,0.032231,3597500.0,"[WMT, F, CAT, UPS, MKC]",-0.031277,32907900.0,"[MSFT, INTC, JPM, XOM, NKE]"
2001-04,0.029932,3848760.0,"[WMT, F, CAT, UPS, MKC]",0.122438,29564080.0,"[MSFT, INTC, JPM, XOM, NKE]"
...,...,...,...,...,...,...
2020-08,0.098878,5241040.0,"[NKE, WMT, CAT, UPS, MKC]",0.037041,32891360.0,"[F, MSFT, INTC, XOM, JPM]"
2020-09,0.027482,6247880.0,"[NKE, WMT, CAT, UPS, MKC]",-0.050831,31221860.0,"[F, MSFT, INTC, XOM, JPM]"
2020-10,-0.025193,4996360.0,"[NKE, WMT, CAT, UPS, MKC]",-0.010596,45232160.0,"[F, MSFT, INTC, XOM, JPM]"
2020-11,0.090591,5886920.0,"[NKE, WMT, CAT, UPS, MKC]",0.139027,43478740.0,"[F, MSFT, INTC, XOM, JPM]"


QUESTION 4 - portfolios sorted based on the average past size of company in previous n periods, portfolio components change every REB periods.
p1 - 30% smallest (by volume) stocks in t-1,
p2 - 30% biggest (by volume) stocks in t-1

In [43]:
Q4 = constrcut_portfolios(df=df, decision_var="volume", reb=12, per=12, small_p=.3, big_p=.3)
Q4

Unnamed: 0_level_0,ptf_1_return,ptf_1_volume,ptf_1_stocks,ptf_2_return,ptf_2_volume,ptf_2_stocks
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-12,,1.715267e+06,"[CAT, UPS, MKC]",,5.456273e+07,"[MSFT, INTC, JPM]"
2001-01,-0.000334,2.311733e+06,"[CAT, UPS, MKC]",0.282928,5.564127e+07,"[MSFT, INTC, JPM]"
2001-02,-0.022604,1.612733e+06,"[CAT, UPS, MKC]",-0.137767,4.790680e+07,"[MSFT, INTC, JPM]"
2001-03,0.047273,1.957367e+06,"[CAT, UPS, MKC]",-0.063196,4.898723e+07,"[MSFT, INTC, JPM]"
2001-04,0.025581,2.350967e+06,"[CAT, UPS, MKC]",0.160727,4.112407e+07,"[MSFT, INTC, JPM]"
...,...,...,...,...,...,...
2020-08,0.091688,2.535533e+06,"[CAT, UPS, MKC]",0.066442,4.165927e+07,"[F, MSFT, INTC]"
2020-09,0.002593,3.643633e+06,"[CAT, UPS, MKC]",-0.024856,3.916167e+07,"[F, MSFT, INTC]"
2020-10,-0.024727,3.663200e+06,"[CAT, UPS, MKC]",-0.007184,5.436020e+07,"[F, MSFT, INTC]"
2020-11,0.076672,2.961200e+06,"[CAT, UPS, MKC]",0.107951,5.245587e+07,"[F, MSFT, INTC]"
