In [1]:
# import packages
import pandas as pd
import numpy as np

"""
Here we load 14 different FF datasets:
1. FF3 Daily
2. FF3 Monthly
3. 25 Size/BM Daily and Monthly
4. 25 Size/OP Daily
5. 25 Size/Inv Daily
6. FF3 Developed ex US Daily
7. FF5 Developed ex US Monthly
8. 25 Size/BM Developed ex US Daily and Monthly
9. 25 Size/OP Developed ex US Daily and Monthly
10. 25 Size/Inv Developed ex US Daily and Monthly
"""

'\nHere we load 14 different FF datasets:\n1. FF3 Daily\n2. FF3 Monthly\n3. 25 Size/BM Daily and Monthly\n4. 25 Size/OP Daily\n5. 25 Size/Inv Daily\n6. FF3 Developed ex US Daily\n7. FF5 Developed ex US Monthly\n8. 25 Size/BM Developed ex US Daily and Monthly\n9. 25 Size/OP Developed ex US Daily and Monthly\n10. 25 Size/Inv Developed ex US Daily and Monthly\n'

In [2]:
# set default directory
maindir = "C:/Users/phils/OneDrive/Dokumente/Studium/4_PhD/3_Courses/CrossSectional_AssetPricing/Presentations/code/python/"

In [3]:
# FF analysis
## load fama french data
### 1. Daily FF3
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/F-F_Research_Data_Factors_daily.txt",
        sep = '\s+',
        header = 1,
        names = ['Mkt-RF', 'SMB', 'HML', 'RF'],
        index_col = 0,
        dtype = {'date': str, 'Mkt-RF': np.float64, 'SMB': np.float64, 'HML': np.float64, 'RF': np.float64},
        skiprows = 2,
        skipfooter = 1,
        encoding = "utf-8",
        engine = 'python'
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():
        rfdatesd = data.index
        rfd = data["RF"] / 100

In [4]:
### 2. Monthly FF3
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/F-F_Research_Data_Factors.txt",
        sep = '\s+',
        header = 1,
        names = ['Mkt-RF', 'SMB', 'HML', 'RF'],
        index_col = 0,
        dtype = {'date': str, 'Mkt-RF': np.float64, 'SMB': np.float64, 'HML': np.float64, 'RF': np.float64},
        skiprows = 1,
        encoding = "utf-8",
        engine = 'python',
        nrows=1122
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():
        rfdatesm = data.index
        rfm = data["RF"] / 100

In [5]:
### 3. 25 Size/BM Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/25_Portfolios_5x5_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Low', '2', '3', '4', 'High']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 18,
        encoding = "utf-8",
        engine = 'python',
        nrows=24664
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_BM.npz', **result)

In [6]:
### 3. 25 Size/BM Monthly
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/25_Portfolios_5x5.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Lo BM', '2', '3', '4', 'Hi BM']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 15,
        encoding = "utf-8",
        engine = 'python',
        nrows=1122
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesm2 = rfdatesm.copy()
        rfm2 = rfm.copy()

        #### Extract the returns data and handle missing values
        retm = data
        retm[retm < -99] = np.nan
        retm = retm / 100

        #### Extract the dates data and intersect with rfdates
        datesm = data.index
        intersection = datesm.intersection(rfdatesm2)
        datesm = datesm[datesm.isin(intersection)]
        retm = retm.loc[intersection]

        #### Filter rfdates to match the common dates
        rfm2 = rfm2[rfdatesm2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retm': retm.values, 'datesm': datesm.values, 'rfm': rfm2.values}
        np.savez(maindir + 'data/25_Size_BM_monthly.npz', **result)

In [7]:
### 4. 25 Size/OP Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/25_Portfolios_ME_OP_5x5_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Low OP', '2', '3', '4', 'High OP']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 16,
        encoding = "utf-8",
        engine = 'python',
        nrows=14265
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_OP.npz', **result)

In [8]:
### 5. 25 Size/Inv Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/25_Portfolios_ME_INV_5x5_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['LowINV', '2', '3', '4', 'HighINV']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 16,
        encoding = "utf-8",
        engine = 'python',
        nrows=14265
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_Inv.npz', **result)

In [9]:
### 6. FF3 Developed ex US Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_3_Factors_Daily.txt",
        sep = '\s+',
        header = 1,
        names = ['Mkt-RF', 'SMB', 'HML', 'RF'],
        index_col = 0,
        dtype = {'date': str, 'Mkt-RF': np.float64, 'SMB': np.float64, 'HML': np.float64, 'RF': np.float64},
        skiprows = 2,
        encoding = "utf-8",
        engine = 'python'
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():
        rfdatesd = data.index
        rfd = data["RF"] / 100


In [10]:
### 7. FF5 Developed ex US Monthly
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_5_Factors.txt",
        sep = '\s+',
        header = 1,
        names = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA','RF'],
        index_col = 0,
        dtype = {
            'date': str,
            'Mkt-RF': np.float64,
            'SMB': np.float64,
            'HML': np.float64,
            'RMW': np.float64,
            'CMA': np.float64,
            'RF': np.float64
        },
        skiprows = 2,
        encoding = "utf-8",
        engine = 'python',
        nrows=354
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():
        rfdatesm = data.index
        rfm = data["RF"] / 100

In [11]:
### 8. 25 Size/BM Developed ex US Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_BE-ME_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Low', '2', '3', '4', 'High']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 18,
        encoding = "utf-8",
        engine = 'python',
        nrows=7700
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_BM_DevExUS.npz', **result)

In [12]:
### 8. 25 Size/BM Developed ex US Monthly
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_BE-ME.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Lo BM', '2', '3', '4', 'Hi BM']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 20,
        encoding = "utf-8",
        engine = 'python',
        nrows=354
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesm2 = rfdatesm.copy()
        rfm2 = rfm.copy()

        #### Extract the returns data and handle missing values
        retm = data
        retm[retm < -99] = np.nan
        retm = retm / 100

        #### Extract the dates data and intersect with rfdates
        datesm = data.index
        intersection = datesm.intersection(rfdatesm2)
        datesm = datesm[datesm.isin(intersection)]
        retm = retm.loc[intersection]

        #### Filter rfdates to match the common dates
        rfm2 = rfm2[rfdatesm2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retm': retm.values, 'datesm': datesm.values, 'rfm': rfm2.values}
        np.savez(maindir + 'data/25_Size_BM_DevExUS_monthly.npz', **result)

In [13]:
### 9. 25 Size/OP Developed ex US Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_OP_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Low', '2', '3', '4', 'High']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 18,
        encoding = "utf-8",
        engine = 'python',
        nrows=7700
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_OP_DevExUS.npz', **result)

In [14]:
### 9. 25 Size/OP Developed ex US Monthly
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_OP.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Lo BM', '2', '3', '4', 'Hi BM']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 20,
        encoding = "utf-8",
        engine = 'python',
        nrows=354
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesm2 = rfdatesm.copy()
        rfm2 = rfm.copy()

        #### Extract the returns data and handle missing values
        retm = data
        retm[retm < -99] = np.nan
        retm = retm / 100

        #### Extract the dates data and intersect with rfdates
        datesm = data.index
        intersection = datesm.intersection(rfdatesm2)
        datesm = datesm[datesm.isin(intersection)]
        retm = retm.loc[intersection]

        #### Filter rfdates to match the common dates
        rfm2 = rfm2[rfdatesm2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retm': retm.values, 'datesm': datesm.values, 'rfm': rfm2.values}
        np.savez(maindir + 'data/25_Size_OP_DevExUS_monthly.npz', **result)

In [15]:
### 10. 25 Size/Inv Developed ex US Daily
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_INV_Daily.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Low OP', '2', '3', '4', 'High OP']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 18,
        encoding = "utf-8",
        engine = 'python',
        nrows=7700
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m%d")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesd2 = rfdatesd.copy()
        rfd2 = rfd.copy()

        #### Extract the returns data and handle missing values
        retd = data
        retd[retd < -99] = np.nan
        retd = retd / 100

        #### Extract the dates data and intersect with rfdates
        datesd = data.index
        intersection = datesd.intersection(rfdatesd2)
        datesd = datesd[datesd.isin(intersection)]
        retd = retd.loc[intersection]

        #### Filter rfdates to match the common dates
        rfd2 = rfd2[rfdatesd2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retd': retd.values, 'datesd': datesd.values, 'rfd': rfd2.values}
        np.savez(maindir + 'data/25_Size_Inv_DevExUS.npz', **result)

In [16]:
### 10. 25 Size/Inv Developed ex US Monthly
try:
    data = pd.read_csv(
        filepath_or_buffer = maindir + "data/Developed_ex_US_25_Portfolios_ME_INV.txt",
        sep = '\s+',
        header = 1,
        names = [['Small-', '2-', '3-', '4-', 'Big-'][j] + elem for j in range(5) for elem in ['Lo BM', '2', '3', '4', 'Hi BM']],
        index_col = 0,
        dtype = {f'column{i}': (str if i == 0 else np.float64) for i in range(0, 26)},
        skiprows = 18,
        encoding = "utf-8",
        engine = 'python',
        nrows=354
    )
    #### set date
    data.index = pd.to_datetime(data.index, format="%Y%m")
except FileNotFoundError:
    print("File not found. Terminating script.")
    raise SystemExit
except Exception as e:
    print(f"An error occurred: {str(e)}. Terminating script.")
    raise SystemExit
finally:
    if 'data' in locals():

        #### Copy data from factors
        rfdatesm2 = rfdatesm.copy()
        rfm2 = rfm.copy()

        #### Extract the returns data and handle missing values
        retm = data
        retm[retm < -99] = np.nan
        retm = retm / 100

        #### Extract the dates data and intersect with rfdates
        datesm = data.index
        intersection = datesm.intersection(rfdatesm2)
        datesm = datesm[datesm.isin(intersection)]
        retm = retm.loc[intersection]

        #### Filter rfdates to match the common dates
        rfm2 = rfm2[rfdatesm2.isin(intersection)]

        #### Save the processed data to a file
        result = {'retm': retm.values, 'datesm': datesm.values, 'rfm': rfm2.values}
        np.savez(maindir + 'data/25_Size_Inv_DevExUS_monthly.npz', **result)