In [1]:
import yfinance as yf
import pandas as pd
import datetime as dt
import os
import numpy as np
import math

In [2]:
def pull_stock_data(tickers:str, start:str, end:str, interval:str, stripdateindex:str, column:str) -> pd.DataFrame:
    """
    Returns a dataframe with chosen stock information for last day of the month.
    Parameters:
    -------------
    tickers (str): Yahoo finance tickers for companies divided ONLY by single space
    start (str): start date of the period (format yyyy-mm-dd)
    end (str): end date of the period (format yyyy-mm-dd)
    interval (str): wanted interval (1d, 1m, 1y)  
    stripdateindex (str): argument for stripping datetime index down (d - day, m - month, y- year)
    columns (list): list of wanted values, args same as in yahoo finance
    """
    # data download
    df = yf.download(tickers = tickers, start = start, end = end, interval = interval, groupby = 'ticker')
    # changing index from datetime to just year and month
    df['Date'] = df.index
    dfg = df.groupby([df.index.year, df.index.month], as_index=False).last()
    dfg.reset_index(inplace=True, drop=True)
    dfg.set_index('Date', inplace=True)
    dfg.index = pd.to_datetime(dfg.index).to_period(stripdateindex)
    # filling na rows with backfill
    dfg.fillna(method = 'bfill',inplace = True)
    return dfg[column]
def save_to_desktop(dataframe:pd.DataFrame, file_name:str):
    """
    Saves dataframe to desktop in csv format under filename provided
    Parameters:
    ------------
    dataframe (pd.DataFrame): dataframe that is to be saved
    file_name (str): name of the file
    """
    # defining path to desktop on running unit
    desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
    # saving file under chosen name on desktop
    dataframe.to_csv(desktop+'/'+file_name+'.csv', sep=';', encoding='UTF-8')

In [3]:
keneth_data = pd.read_csv(r'F-F_Research_Data_Factors.csv', sep = ';')
keneth_data['Date'] = keneth_data['Date'].astype('string')

keneth_data['Date'] = pd.to_datetime(keneth_data['Date'], format='%Y%m')
keneth_data.set_index('Date', inplace=True)
keneth_data.index = keneth_data.index.to_period('m')
keneth_data = keneth_data/100

In [4]:
ticker = 'AAPL LLOY.L ED IBM KO GE BRK-A DIS SBUX GS'
star = '2000-12-01'
end = '2020-12-31'
intv = '1d'
strpdt = 'm'
col = ['Close', 'Volume']
stocks_data = pull_stock_data(ticker, star, end, intv, strpdt, col)
stocks_data.head(10)

[*********************100%***********************]  10 of 10 completed




Unnamed: 0_level_0,Close,Close,Close,Close,Close,Close,Close,Close,Close,Close,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,BRK-A,DIS,ED,GE,GS,IBM,KO,LLOY.L,SBUX,AAPL,BRK-A,DIS,ED,GE,GS,IBM,KO,LLOY.L,SBUX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2000-12,0.265625,71000.0,28.546844,38.5,368.75,106.9375,81.261948,30.46875,459.938904,5.53125,630336000.0,57000.0,4627876.0,409600.0,1427829.0,2075000.0,7263110.0,4970400.0,6148574.0,11246400.0
2001-01,0.386161,68400.0,30.038925,34.939999,353.692322,113.75,107.07457,29.0,454.741791,6.242188,730704800.0,30000.0,5115560.0,432100.0,2774993.0,3500600.0,8696653.0,8614000.0,77250997.0,31927200.0
2001-02,0.325893,70300.0,30.532175,36.869999,357.692322,91.75,95.506691,26.514999,426.158112,5.953125,508233600.0,21000.0,3658794.0,1036800.0,3451942.0,4745600.0,12482127.0,11189600.0,60891735.0,20444800.0
2001-03,0.394107,65450.0,28.2139,37.099998,322.0,85.099998,91.950287,22.58,449.220001,5.304688,400349600.0,42000.0,3837202.0,535100.0,3825653.0,2855900.0,9896520.0,10279600.0,24544908.0,22817600.0
2001-04,0.455179,68000.0,29.841625,37.41,373.307678,91.099998,110.076485,23.094999,474.880402,4.8375,494776800.0,53000.0,3923974.0,387200.0,2372370.0,2716800.0,7695631.0,7674000.0,9062688.0,28308000.0
2001-05,0.35625,68700.0,31.19313,39.150002,376.923065,95.099998,106.883362,23.700001,467.734497,4.88,442892800.0,48000.0,5025748.0,276100.0,1740388.0,1893200.0,7033409.0,7223400.0,14989139.0,21777200.0
2001-06,0.415179,69400.0,28.499985,39.799999,376.923065,85.800003,108.508606,22.5,462.212585,5.75,515390400.0,62000.0,5453725.0,615900.0,6849739.0,4347900.0,9913360.0,13146800.0,27032220.0,14779600.0
2001-07,0.335536,69200.0,25.994274,39.740002,334.615387,83.160004,100.583176,22.299999,466.435211,4.51,235026400.0,49000.0,7955195.0,506000.0,3218943.0,2308100.0,5860738.0,8695000.0,19678315.0,19498400.0
2001-08,0.33125,69400.0,25.086695,40.900002,314.615387,80.099998,95.554497,24.334999,461.562988,4.2175,216904800.0,33000.0,4093969.0,313200.0,2243761.0,2008900.0,8500424.0,7670800.0,26085714.0,31194800.0
2001-09,0.276964,70000.0,18.368629,40.720001,286.153839,71.349998,87.686424,23.424999,418.362488,3.735,365108800.0,182000.0,12816320.0,890300.0,4221230.0,3247900.0,11586751.0,11554000.0,19353263.0,15286400.0


In [5]:
stocks_price = stocks_data['Close']
stock_volume = stocks_data['Volume']

calculating the stock returns

In [6]:
stock_returns = stocks_price.pct_change()
stock_returns_full = stock_returns.merge(keneth_data, how = 'left', left_index=True, right_index=True)

calculating stock excess return

In [7]:
tickers = stock_returns_full.columns.tolist()
for i in tickers[:10]:
    stock_returns_full[i + '_excess_return'] = stock_returns_full[i] - stock_returns_full['RF']

cleaning the dataframe - deleting stock prices

In [8]:
stock_returns_full = stock_returns_full.iloc[:,14:]

QUESTION 1 - portfolios sorted based on the past size of company.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [9]:
def portfolio_build_q1(stock_volume_data, stock_returns_data):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers 
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0

    for index, row in stock_volume_data.iterrows():
        med = np.median(row.values)
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        for col in stock_volume.columns:
            if row[col] <= med:
                temp_dict["below"].append(col)
            else:
                temp_dict["over"].append(col)
        evaluation[i] = temp_dict
        i += 1

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0
    for index, row in stock_returns_data.iterrows():
        if i == 0:
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1
        previous_index = index
    return target_df
        

In [10]:
q1 = portfolio_build_q1(stock_volume_data=stock_volume, stock_returns_data=stock_returns)
q1

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.010790,0.168091,"[BRK-A, DIS, ED, GE, GS]","[AAPL, IBM, KO, LLOY.L, SBUX]"
1,2001-02,-0.016532,-0.091792,"[BRK-A, DIS, ED, GE, GS]","[AAPL, IBM, KO, LLOY.L, SBUX]"
2,2001-03,-0.062189,-0.006228,"[BRK-A, DIS, ED, GE, GS]","[AAPL, IBM, KO, LLOY.L, SBUX]"
3,2001-04,0.066971,0.068791,"[BRK-A, DIS, ED, GE, GS]","[AAPL, IBM, KO, LLOY.L, SBUX]"
4,2001-05,0.031138,-0.045283,"[BRK-A, DIS, ED, GE, GS]","[AAPL, IBM, KO, LLOY.L, SBUX]"
...,...,...,...,...,...
235,2020-08,0.037152,0.091986,"[BRK-A, ED, GS, IBM, SBUX]","[AAPL, DIS, GE, KO, LLOY.L]"
236,2020-09,0.010461,-0.039480,"[BRK-A, ED, GS, IBM, SBUX]","[AAPL, DIS, GE, KO, LLOY.L]"
237,2020-10,-0.035070,0.027602,"[BRK-A, ED, GS, IBM, SBUX]","[AAPL, DIS, GE, KO, LLOY.L]"
238,2020-11,0.130856,0.187485,"[BRK-A, DIS, ED, GS, IBM]","[AAPL, GE, KO, LLOY.L, SBUX]"


QUESTION 2 - portfolios sorted based on the average past size of company in previous n periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [11]:
def portfolio_build_q2(stock_volume_data, stock_returns_data, number_of_periods):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    for index, row in stock_volume_data.iterrows():
        temp_avg = {}
        for col in stock_volume.columns:
            if i < number_of_periods-1:
                temp_avg[col] = np.mean(stock_volume[col][:i+1])
            else:
                temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

            temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
            temp_avg_keys = list(temp_avg.keys())
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        
        temp_dict['below'] = temp_avg_keys[:5]
        temp_dict['over'] = temp_avg_keys[5:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [12]:
q2 = portfolio_build_q2(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2)

In [13]:
q2

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.010790,0.168091,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,-0.016532,-0.091792,"[BRK-A, ED, GE, GS, DIS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
2,2001-03,-0.062189,-0.006228,"[BRK-A, ED, GE, GS, DIS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
3,2001-04,0.066971,0.068791,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.031138,-0.045283,"[BRK-A, ED, GS, GE, DIS]","[IBM, KO, LLOY.L, SBUX, AAPL]"
...,...,...,...,...,...
235,2020-08,0.037152,0.091986,"[BRK-A, GS, ED, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
236,2020-09,0.010461,-0.039480,"[BRK-A, GS, ED, IBM, SBUX]","[DIS, KO, GE, AAPL, LLOY.L]"
237,2020-10,-0.035070,0.027602,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.112152,0.206188,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, LLOY.L, AAPL]"


QUESTION 3 - portfolios sorted based on the average past size of company in previous n periods, portfolio components change every REB periods.
p1 - 50% smallest (by volume) stocks in t-1,
p2 - 50% biggest (by volume) stocks in t-1

In [14]:
def portfolio_build_q3(stock_volume_data, stock_returns_data, number_of_periods, REB):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    REB (int) - how often (in periods) portolio should be recalculated
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    for index, row in stock_volume_data.iterrows():
        if i % REB == 0:
            temp_avg = {}
            for col in stock_volume.columns:
                if i < number_of_periods-1:
                    temp_avg[col] = np.mean(stock_volume[col][:i+1])
                else:
                    temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

                temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
                temp_avg_keys = list(temp_avg.keys())
        
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        
        temp_dict['below'] = temp_avg_keys[:5]
        temp_dict['over'] = temp_avg_keys[5:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [15]:
q3 = portfolio_build_q3(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2, REB =3)
q3

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.010790,0.168091,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,-0.016532,-0.091792,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
2,2001-03,-0.062189,-0.006228,"[BRK-A, ED, GE, GS, DIS]","[KO, LLOY.L, IBM, SBUX, AAPL]"
3,2001-04,0.066971,0.068791,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.031138,-0.045283,"[BRK-A, ED, GE, DIS, GS]","[KO, IBM, SBUX, LLOY.L, AAPL]"
...,...,...,...,...,...
235,2020-08,0.037152,0.091986,"[BRK-A, ED, GS, IBM, SBUX]","[GE, DIS, KO, AAPL, LLOY.L]"
236,2020-09,0.010461,-0.039480,"[BRK-A, ED, GS, IBM, SBUX]","[GE, DIS, KO, AAPL, LLOY.L]"
237,2020-10,-0.035070,0.027602,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.112152,0.206188,"[BRK-A, ED, GS, IBM, SBUX]","[DIS, GE, KO, AAPL, LLOY.L]"


In [16]:
def portfolio_build_q4(stock_volume_data, stock_returns_data, number_of_periods, REB,PER1 ,PER2):
    """
    Returns a dataframe with portfolio average returns. Portfolio components are established based on the stock volume in previous period. 
    p1 - contains the average excess return of 50% stocks with lowest stock volume
    p2 - contains the average excess return of 50% stocks with highest stock volume
    p1_comp - contains the ticker names of companies in the p1 portfolio
    p2_comp - contains the ticker names of companies in the p2 portfolio
    Parameters:
    -------------
    stock_volume_data (df) - stock volume for tickers
    stock_returns_data (df) - excess returns for tickers
    number_of_periods (int) - how many periods prior shuold be taken into consideration while creating portfolios
    REB (int) - how often (in periods) portolio should be recalculated
    PER1 (int) - how many % of companies should be in p1 portfolio
    PER2 (int) - how many % of companies should be in p2 portfolio
    """
    target_df = pd.DataFrame(columns=('date','p1','p2','p1_comp','p2_comp'))
    evaluation = {}
    i = 0 #counter
    PER1 = math.ceil(PER1 * (len(list(stock_volume_data.columns)) / 100))
    for index, row in stock_volume_data.iterrows():
        if i % REB == 0:
            temp_avg = {}
            for col in stock_volume.columns:
                if i < number_of_periods-1:
                    temp_avg[col] = np.mean(stock_volume[col][:i+1])
                else:
                    temp_avg[col] = np.mean(stock_volume[col][i-number_of_periods+1:i+1])

                temp_avg = dict(sorted(temp_avg.items(), key=lambda item: item[1]))
                temp_avg_keys = list(temp_avg.keys())
        
        
        temp_dict = {'Date': str(index) ,"below": [], "over":[]}
        temp_dict['below'] = temp_avg_keys[:PER1]
        temp_dict['over'] = temp_avg_keys[PER1:]

        evaluation[i] = temp_dict
        i += 1 #updating counter

    rank = pd.DataFrame.from_dict(evaluation).T
    i = 0 #reseting counter before new loop

    for index, row in stock_returns_data.iterrows():
        if i == 0: #counter
            pass
        else:
            temp = rank.loc[rank['Date'] == str(previous_index)]
            below = temp['below']
            over = temp['over']

            below_list = []
            for b_col in below:
                below_list.append(row[b_col])
            below_avg = np.mean(below_list) 
            
            over_list = []
            for o_col in over:
                over_list.append(row[o_col])
            over_avg = np.mean(over_list)

            target_df = target_df.append({"date": str(index), "p1": below_avg, "p2": over_avg, "p1_comp": below.values[0], "p2_comp": over.values[0]}, ignore_index=True)

        i += 1 #updating counter
        previous_index = index #assigning new value to previous_index
    return target_df
    

In [17]:
q4 = portfolio_build_q4(stock_returns_data=stock_returns, stock_volume_data=stock_volume, number_of_periods=2, REB =3, PER1=13,PER2=87)
q4

Unnamed: 0,date,p1,p2,p1_comp,p2_comp
0,2001-01,-0.064544,0.114449,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
1,2001-02,0.041508,-0.078080,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
2,2001-03,-0.031376,-0.034916,"[BRK-A, ED]","[GE, GS, DIS, KO, LLOY.L, IBM, SBUX, AAPL]"
3,2001-04,0.023658,0.078936,"[BRK-A, ED]","[GE, DIS, GS, KO, IBM, SBUX, LLOY.L, AAPL]"
4,2001-05,0.028403,-0.015942,"[BRK-A, ED]","[GE, DIS, GS, KO, IBM, SBUX, LLOY.L, AAPL]"
...,...,...,...,...,...
235,2020-08,0.022047,0.075199,"[BRK-A, ED]","[GS, IBM, SBUX, GE, DIS, KO, AAPL, LLOY.L]"
236,2020-09,0.033738,-0.026571,"[BRK-A, ED]","[GS, IBM, SBUX, GE, DIS, KO, AAPL, LLOY.L]"
237,2020-10,-0.022911,0.001060,"[BRK-A, ED]","[GS, IBM, SBUX, DIS, GE, KO, AAPL, LLOY.L]"
238,2020-11,0.053810,0.185510,"[BRK-A, ED]","[GS, IBM, SBUX, DIS, GE, KO, AAPL, LLOY.L]"
