In [1]:
import yfinance as yf
import pandas as pd
import datetime as dt
import os
import numpy as np
import math

* Q1 - rebalance portfolio 
* Q2 - define period for which we take into account the size (Market Cap) - moving average of the size over the period Rebalancing every month
* Q3 - Add rebalancing every REB months e.g. every 12 months, so you keep the same portfolio composition for 12 months each time.
* Q4 - change percentage of companies taken to each of 2 portfolios

In [2]:

def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.

    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # dropping na rows
    dfg.dropna(inplace = True)
    return dfg[column]

    
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided

    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';')


In [11]:
from pandas_datareader import data
data.get_quote_yahoo(tick)

ModuleNotFoundError: No module named 'pandas_datareader'

In [3]:
tick = "NKE MSFT XOM INTC CAT WMT JPM F UPS MKC" # stocks universe
st = '2000-12-01'
en = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']

stocks_data = pull_stock_data(tick, st, en, intv, strpdt, col)
stocks_data.info()

[*********************100%***********************]  10 of 10 completed
<class 'pandas.core.frame.DataFrame'>
PeriodIndex: 241 entries, 2000-12 to 2020-12
Freq: M
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   (Close, CAT)    241 non-null    float64
 1   (Close, F)      241 non-null    float64
 2   (Close, INTC)   241 non-null    float64
 3   (Close, JPM)    241 non-null    float64
 4   (Close, MKC)    241 non-null    float64
 5   (Close, MSFT)   241 non-null    float64
 6   (Close, NKE)    241 non-null    float64
 7   (Close, UPS)    241 non-null    float64
 8   (Close, WMT)    241 non-null    float64
 9   (Close, XOM)    241 non-null    float64
 10  (Volume, CAT)   241 non-null    int64  
 11  (Volume, F)     241 non-null    int64  
 12  (Volume, INTC)  241 non-null    int64  
 13  (Volume, JPM)   241 non-null    int64  
 14  (Volume, MKC)   241 non-null    int64  
 15  (Volume, MSFT)  241 non-null    int



In [16]:
keneth_data = pd.read_csv(r'F-F_Research_Data_Factors.csv', sep = ';')
keneth_data['Date'] = keneth_data['Date'].astype('string')

keneth_data['Date'] = pd.to_datetime(keneth_data['Date'], format='%Y%m')
keneth_data.set_index('Date', inplace=True)
keneth_data.index = keneth_data.index.to_period('m')
keneth_data = keneth_data/100

In [10]:
stocks_price = stocks_data['Close']
stock_volume = stocks_data['Volume']


Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM,CAT,F,INTC,JPM,MKC,MSFT,NKE,UPS,WMT,XOM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2000-12,,,,,,,,,,,3051800,4746800,54053600,9657000,998000,99977600,8772800,1096000,6582300,9564000
2001-01,-0.065363,0.202773,0.230769,0.210234,0.010745,0.407781,-0.014379,0.053617,0.069176,-0.032063,3889000,4553600,71894200,13130800,264000,81898800,7408000,2782200,10848100,17017600
2001-02,-0.059249,-0.013480,-0.228041,-0.151482,0.078189,-0.033777,-0.290493,-0.086753,-0.118134,-0.036839,3025200,4803100,48601400,10510600,962800,84608400,23287200,850200,7748200,11591200
2001-03,0.066827,0.011147,-0.078775,-0.037720,0.068448,-0.073093,0.033820,0.006545,0.008185,-0.000617,3046800,3807000,47297100,8463000,1899600,91201600,4391200,925700,8308400,13186600
2001-04,0.131140,0.048364,0.174727,0.068597,-0.064063,0.238857,0.036183,0.009666,0.024554,0.093827,5055000,5593100,41266300,7737700,956400,74368200,9381600,1041500,6597800,15066600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,0.070966,0.031770,0.067463,0.036734,0.057978,0.100093,0.146296,0.146119,0.073029,-0.050856,2886500,50089500,46114100,14219400,759200,28774200,3519800,3960900,15078800,25259600
2020-09,0.048064,-0.023460,0.016290,-0.039126,-0.058681,-0.067397,0.121995,0.018396,0.007634,-0.140461,3642800,55944600,27711300,16400600,1861400,33829100,8699700,5426700,11608800,22223700
2020-10,0.052967,0.160661,-0.144844,0.018386,-0.070016,-0.037370,-0.043492,-0.057133,-0.008291,-0.049811,4373500,79345600,46781300,15194500,1375600,36953700,7791600,5240500,6200600,47885700
2020-11,0.105317,0.174644,0.091915,0.202366,0.035843,0.057292,0.121752,0.088855,0.101189,0.168915,2382600,77970100,46332700,14411800,1792400,33064800,9652500,4708600,10898500,45614300


In [9]:

stocks_data['Close'] = stocks_data['Close'].pct_change()


calculating the stock returns

In [18]:
stock_returns = stocks_price.pct_change()
stock_returns_full = stock_returns.merge(keneth_data, how = 'left', left_index=True, right_index=True)

calculating stock excess return

In [19]:
tickers = stock_returns_full.columns.tolist()
for i in tickers[:10]:
    stock_returns_full[i + '_excess_return'] = stock_returns_full[i] - stock_returns_full['RF']

cleaning the dataframe - deleting stock prices

In [20]:
stock_returns_full = stock_returns_full.iloc[:,14:]

QUESTION 1 - portfolios sorted based on the past size of company.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [21]:
def portfolio_build_q1(stock_volume_data, stock_returns_data):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers 
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0

    for index, row in stock_volume_data.iterrows():
        med = np.median(row.values)
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        for col in stock_volume.columns:
            if row[col] <= med:
                temp_dict["below"].append(col)
            else:
                temp_dict["over"].append(col)
        evaluation[i] = temp_dict
        i += 1

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0
    for index, row in stock_returns_data.iterrows():
        if i == 0:
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1
        previous_index = index
    return target_df
        

In [22]:
q1 = portfolio_build_q1(stock_volume_data=stock_volume, stock_returns_data=stock_returns)
q1

  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)
  target_df = target_df.append({"date": 

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,0.05419,0.160468,"[CAT, F, MKC, UPS, WMT]","[INTC, JPM, MSFT, NKE, XOM]"
1,2001-02,-0.074357,-0.113654,"[CAT, F, MKC, NKE, UPS]","[INTC, JPM, MSFT, WMT, XOM]"
2,2001-03,0.032231,-0.031277,"[CAT, F, MKC, UPS, WMT]","[INTC, JPM, MSFT, NKE, XOM]"
3,2001-04,0.032258,0.120112,"[CAT, F, MKC, NKE, UPS]","[INTC, JPM, MSFT, WMT, XOM]"
4,2001-05,-0.006427,-0.019194,"[CAT, F, MKC, UPS, WMT]","[INTC, JPM, MSFT, NKE, XOM]"
...,...,...,...,...,...
235,2020-08,0.098878,0.037041,"[CAT, MKC, NKE, UPS, WMT]","[F, INTC, JPM, MSFT, XOM]"
236,2020-09,0.01813,-0.041479,"[CAT, JPM, MKC, NKE, UPS]","[F, INTC, MSFT, WMT, XOM]"
237,2020-10,-0.025193,-0.010596,"[CAT, MKC, NKE, UPS, WMT]","[F, INTC, JPM, MSFT, XOM]"
238,2020-11,0.090591,0.139027,"[CAT, MKC, NKE, UPS, WMT]","[F, INTC, JPM, MSFT, XOM]"


QUESTION 2 - portfolios sorted based on the average past size of company in previous n periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [11]:
def portfolio_build_q2(stock_volume_data, stock_returns_data, number_of_periods):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    for index, row in stock_volume_data.iterrows():
        temp_avg = {}
        for col in stock_volume.columns:
            if i < number_of_periods-1:
                temp_avg[col] = np.mean(stock_volume[col][:i+1])
            else:
                temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

            temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
            temp_avg_keys = list(temp_avg.keys())
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        
        temp_dict['below'] = temp_avg_keys[:5]
        temp_dict['over'] = temp_avg_keys[5:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [12]:
q2 = portfolio_build_q2(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2)

In [13]:
q2

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.010790,0.168091,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,-0.016532,-0.091792,"[BRK-A, ED, GE, GS, DIS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
2,2001-03,-0.062189,-0.006228,"[BRK-A, ED, GE, GS, DIS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
3,2001-04,0.066971,0.068791,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.031138,-0.045283,"[BRK-A, ED, GS, GE, DIS]","[IBM, KO, LLOY.L, SBUX, AAPL]"
...,...,...,...,...,...
235,2020-08,0.037152,0.091986,"[BRK-A, GS, ED, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
236,2020-09,0.010461,-0.039480,"[BRK-A, GS, ED, IBM, SBUX]","[DIS, KO, GE, AAPL, LLOY.L]"
237,2020-10,-0.035070,0.027602,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.112152,0.206188,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, LLOY.L, AAPL]"


QUESTION 3 - portfolios sorted based on the average past size of company in previous n periods, portfolio components change every REB periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [14]:
def portfolio_build_q3(stock_volume_data, stock_returns_data, number_of_periods, REB):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    REB (int) - how often (in periods) portolio should be recalculated
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    for index, row in stock_volume_data.iterrows():
        if i % REB == 0:
            temp_avg = {}
            for col in stock_volume.columns:
                if i < number_of_periods-1:
                    temp_avg[col] = np.mean(stock_volume[col][:i+1])
                else:
                    temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

                temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
                temp_avg_keys = list(temp_avg.keys())
        
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        
        temp_dict['below'] = temp_avg_keys[:5]
        temp_dict['over'] = temp_avg_keys[5:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [15]:
q3 = portfolio_build_q3(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2, REB =3)
q3

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.010790,0.168091,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,-0.016532,-0.091792,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
2,2001-03,-0.062189,-0.006228,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
3,2001-04,0.066971,0.068791,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.031138,-0.045283,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
...,...,...,...,...,...
235,2020-08,0.037152,0.091986,"[BRK-A, ED, GS, IBM, SBUX]","[GE, DIS, KO, AAPL, LLOY.L]"
236,2020-09,0.010461,-0.039480,"[BRK-A, ED, GS, IBM, SBUX]","[GE, DIS, KO, AAPL, LLOY.L]"
237,2020-10,-0.035070,0.027602,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.112152,0.206188,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"


In [80]:
c = stocks_data['Close'].melt(ignore_index=False, value_name="close_price", var_name='company') #.reset_index()
v = stocks_data['Volume'].melt(ignore_index=False, value_name="volume", var_name='company')
df = pd.merge(c, v, on=["Date",'company'])
df

Unnamed: 0_level_0,company,close_price,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-12,CAT,23.656250,3051800
2001-01,CAT,22.110001,3889000
2001-02,CAT,20.799999,3025200
2001-03,CAT,22.190001,3046800
2001-04,CAT,25.100000,5055000
...,...,...,...
2020-08,XOM,39.939999,25259600
2020-09,XOM,34.330002,22223700
2020-10,XOM,32.619999,47885700
2020-11,XOM,38.130001,45614300


In [118]:
period = 1
d = df.index[12]
slice = df[(df.index >= d - period+1) & (df.index <= d)]
period_avg = slice.groupby('company').mean()

# period_avg.sort_values('volume',ascending = False).reset_index().head(small_cnt)
period_avg.reset_index()

Unnamed: 0,company,close_price,volume
0,CAT,26.125,2041000.0
1,F,15.72,7231500.0
2,INTC,31.450001,27975400.0
3,JPM,36.349998,6298000.0
4,MKC,10.4925,627200.0
5,MSFT,33.125,43006200.0
6,NKE,7.03,6570400.0
7,UPS,54.5,1198500.0
8,WMT,57.549999,5134700.0
9,XOM,39.299999,8730500.0


In [99]:
portfolio_size = 11
small_p = .5
big_p = .5
small_cnt = int((portfolio_size*small_p)//1)
big_cnt = round(portfolio_size*big_p)


df.sort_values('volume',ascending = False).head(small_cnt)


Unnamed: 0_level_0,company,close_price,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-04,MSFT,24.15,591052200
2010-03,F,12.57,433659100
2011-04,MSFT,25.92,319317900
2011-01,F,15.95,275198200
2020-03,F,4.83,231800800


In [121]:
for i in range(1, 38):
    print(i%12)

1
2
3
4
5
6
7
8
9
10
11
0
1
2
3
4
5
6
7
8
9
10
11
0
1
2
3
4
5
6
7
8
9
10
11
0
1


In [122]:
period = 6
rebalance_period = 12


for p_idx, d in enumerate(df.index, 1):
    if p_idx%rebalance_period == 0:
        slice = df[(df.index >= d - period+1) & (df.index <= d)]
        
        period_avg = slice.groupby('company').mean().reset_index()
        
        temp_sort = period_avg.sort_values('volume',ascending = False)
        small_comp = temp_sort.tail(small_cnt)
        big_comp = temp_sort.head(big_cnt)
        print(p_idx)
        print(big_comp['company'].values)

12
['MSFT' 'INTC' 'NKE' 'XOM' 'WMT' 'JPM']
24
['MSFT' 'INTC' 'NKE' 'XOM' 'WMT' 'JPM']
36
['MSFT' 'INTC' 'NKE' 'XOM' 'F' 'WMT']
48
['INTC' 'MSFT' 'WMT' 'XOM' 'JPM' 'NKE']
60
['MSFT' 'INTC' 'XOM' 'F' 'WMT' 'NKE']
72
['INTC' 'MSFT' 'F' 'XOM' 'WMT' 'NKE']
84
['MSFT' 'INTC' 'F' 'XOM' 'JPM' 'WMT']
96
['MSFT' 'F' 'INTC' 'JPM' 'XOM' 'WMT']
108
['F' 'INTC' 'MSFT' 'JPM' 'XOM' 'NKE']
120
['F' 'MSFT' 'INTC' 'JPM' 'XOM' 'WMT']
132
['MSFT' 'F' 'INTC' 'JPM' 'XOM' 'NKE']
144
['F' 'MSFT' 'INTC' 'NKE' 'JPM' 'XOM']
156
['MSFT' 'INTC' 'F' 'JPM' 'XOM' 'NKE']
168
['F' 'INTC' 'MSFT' 'XOM' 'JPM' 'NKE']
180
['MSFT' 'F' 'INTC' 'XOM' 'JPM' 'WMT']
192
['F' 'MSFT' 'INTC' 'JPM' 'XOM' 'NKE']
204
['F' 'INTC' 'MSFT' 'NKE' 'JPM' 'XOM']
216
['F' 'INTC' 'MSFT' 'JPM' 'XOM' 'NKE']
228
['F' 'MSFT' 'INTC' 'JPM' 'XOM' 'NKE']
240
['F' 'INTC' 'MSFT' 'XOM' 'JPM' 'WMT']
252
['MSFT' 'INTC' 'NKE' 'XOM' 'WMT' 'JPM']
264
['MSFT' 'INTC' 'NKE' 'XOM' 'WMT' 'JPM']
276
['MSFT' 'INTC' 'NKE' 'XOM' 'F' 'WMT']
288
['INTC' 'MSFT' 'XOM' 'WMT' '

In [None]:
def rebalance(df, sort_var, p1, p2):
    dffull[[stock, 'reM']].rolling(60)

In [16]:
def portfolio_build_q4(stock_volume_data, stock_returns_data, number_of_periods, REB,PER1 ,PER2):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    REB (int) - how often (in periods) portolio should be recalculated
    PER1 (int) - how many % of companies should be in p1 portfolio
    PER2 (int) - how many % of companies should be in p2 portfolio
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    PER1 = math.ceil(PER1 * (len(list(stock_volume_data.columns)) / 100))
    for index, row in stock_volume_data.iterrows():
        if i % REB == 0:
            temp_avg = {}
            for col in stock_volume.columns:
                if i < number_of_periods-1:
                    temp_avg[col] = np.mean(stock_volume[col][:i+1])
                else:
                    temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

                temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
                temp_avg_keys = list(temp_avg.keys())
        
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        temp_dict['below'] = temp_avg_keys[:PER1]
        temp_dict['over'] = temp_avg_keys[PER1:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [17]:
q4 = portfolio_build_q4(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2, REB =3, PER1=13,PER2=87)
q4

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.064544,0.114449,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,0.041508,-0.078080,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
2,2001-03,-0.031376,-0.034916,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
3,2001-04,0.023658,0.078936,"[BRK-A, ED]","[GE, DIS, GS, KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.028403,-0.015942,"[BRK-A, ED]","[GE, DIS, GS, KO, IBM, SBUX, LLOY.L, AAPL]"
...,...,...,...,...,...
235,2020-08,0.022047,0.075199,"[BRK-A, ED]","[GS, IBM, SBUX, GE, DIS, KO, AAPL, LLOY.L]"
236,2020-09,0.033738,-0.026571,"[BRK-A, ED]","[GS, IBM, SBUX, GE, DIS, KO, AAPL, LLOY.L]"
237,2020-10,-0.022911,0.001060,"[BRK-A, ED]","[GS, IBM, SBUX, DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.053810,0.185510,"[BRK-A, ED]","[GS, IBM, SBUX, DIS, GE, KO, AAPL, LLOY.L]"
