In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.tseries.offsets import DateOffset

# Data Reading

First, we have to create dataframes from the given datasets. We have to sort them, clean them, and combine the relevant data.

In [2]:
# Creating dataframe with monthly data for companies
monthly_dataset = pd.read_csv("NL_FR_BE_data_monthly.csv")
monthly_dataset['mdate'] = pd.to_datetime(monthly_dataset['mdate'], format="%Y%m")
monthly_dataset["ISIN"] = monthly_dataset["ISIN"].astype(str) 
monthly_dataset = monthly_dataset.sort_values(["ISIN", "mdate"], ascending=[True, True])       # Sort data by security and date, for easier access
monthly_dataset = monthly_dataset.reset_index()
monthly_dataset = monthly_dataset.drop(["index"], axis = 1)   # Delete duplicate column
monthly_dataset = monthly_dataset.dropna()                            # Just to be sure
print(monthly_dataset.head(3).to_markdown(), "\n")

# Creating annual dataframe for companies
# First we read the given annual data
annual_dataset = pd.read_csv("NL_FR_BE_data_annual.csv")
annual_dataset['fyear'] = pd.to_datetime(annual_dataset['fyear'], format="%Y") + DateOffset(months=5)       # FF data is starting from June
annual_dataset["ISIN"] = annual_dataset["ISIN"].astype(str) 
annual_dataset = annual_dataset.sort_values(["ISIN", "fyear"], ascending=[True, True])       # Sort data by security and date, for easier access
annual_dataset = annual_dataset.reset_index()
annual_dataset = annual_dataset.drop(["index"], axis = 1)                                   # Delete duplicate column
# Next we get relevant data for annual dataframe from the monthly one
monthly_dataset_annual = monthly_dataset.copy()
mask = monthly_dataset_annual['mdate'].dt.month == 6
monthly_dataset_annual = monthly_dataset_annual.loc[mask, :]
monthly_dataset_annual["RET10"] =  monthly_dataset_annual["RET11"] - monthly_dataset_annual["RET"]      # Deleting last month's returns to avoid short term mean-reversion
monthly_dataset_annual["fyear"] = monthly_dataset_annual["mdate"]
monthly_dataset_annual = monthly_dataset_annual.drop(["RET11", "RET", "mdate"], axis = 1)   # Delete duplicate
annual_dataset = annual_dataset.merge(monthly_dataset_annual)       # Final dataset on which we can trade
annual_dataset = annual_dataset.dropna()                            # Just to be sure
print(annual_dataset.head(3).to_markdown(), "\n")

# Finally creating monthly dataframe of the market
monthly_fixed_params = pd.read_csv("Europe_FF_Factors.csv")
monthly_fixed_params['mdate'] = pd.to_datetime(monthly_fixed_params['mdate'], format="%Y%m")
monthly_fixed_params = monthly_fixed_params.sort_values(["mdate"], ascending=[True])       # Sort data by security and date, for easier access
monthly_fixed_params = monthly_fixed_params.reset_index()
monthly_fixed_params = monthly_fixed_params.drop(["index"], axis = 1)   # Delete duplicate column
# Parameters that will be later used to trade (Filters for Signals based on FF)
monthly_fixed_params["b_border"] = np.zeros(len(monthly_fixed_params))
monthly_fixed_params["h_border"] = np.zeros(len(monthly_fixed_params))
monthly_fixed_params["s_border"] = np.zeros(len(monthly_fixed_params))
monthly_fixed_params["ivol_border"] = np.zeros(len(monthly_fixed_params))
print(monthly_fixed_params.head(3).to_markdown(), "\n")

|    | ISIN         | mdate               |     RET |   RET11 |      ME |      b |       h |       s |   ivol |
|---:|:-------------|:--------------------|--------:|--------:|--------:|-------:|--------:|--------:|-------:|
|  0 | ANN4327C1220 | 1991-06-01 00:00:00 | -0.0256 | -0.2089 | 712.452 | 1.2541 | -0.9141 | -0.1884 | 0.0074 |
|  1 | ANN4327C1220 | 1991-07-01 00:00:00 |  0.024  |  0.1856 | 729.56  | 1.1767 | -1.2208 | -0.2632 | 0.0075 |
|  2 | ANN4327C1220 | 1991-08-01 00:00:00 | -0.0799 |  0.2478 | 671.284 | 0.9473 | -1.7583 | -0.5612 | 0.0189 | 

|    | ISIN         | fyear               |   BEME |     OP |     INV |      ME |      b |       h |       s |   ivol |   RET10 |
|---:|:-------------|:--------------------|-------:|-------:|--------:|--------:|-------:|--------:|--------:|-------:|--------:|
|  0 | ANN4327C1220 | 1991-06-01 00:00:00 | 0.589  | 0.5048 |  0.115  | 712.452 | 1.2541 | -0.9141 | -0.1884 | 0.0074 | -0.1833 |
|  1 | ANN4327C1220 | 1992-06-01 00:00:00 | 0.72

Possibilities for Trades:
1) We are scored based on the FF-3 outperformance. To us is given FF-3, with WML (more on it here https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/Data_Library/f-f_developed_mom.html).
2) With given data we can find FF-5, because Investment(INV) and Operating Profitability (OP) are given (However, only annually)
3) Also, with given data we can easily find momentum, because Trailing Returns are given (Ret11, Ret10)
4) Moreover, implied volatility of returns is given, which means we can create volatility filter too. Combine with some Risk Free rate etc.
5) Currently we have implemented a simple strategyh which trades on the given FF-3 factors. We can improve a lot here. It is also running for around 10min overall

# Strategy
We are trading based on the given FF-3 factors and implied volatility. If company is high on all 3 residuals and has high volatility of returns, we buy it

In [3]:
start_date = monthly_dataset["mdate"].min()
end_date = monthly_dataset["mdate"].max()
datelist = pd.date_range(start_date, end_date, freq ='MS').tolist()
i=-1

# First we have to calculate the border of FF-3 parameters to trade on them. It is just a higher half in our case.
for date in datelist:
    i+=1    # Counter to access correct point in dataframe
    monthly_param_dataset = monthly_dataset[monthly_dataset["mdate"] == date]   # Get all the stocks for the given date
    # Finding borders for residuals, based on all the company residuals of that month
    monthly_fixed_params.iloc[i, monthly_fixed_params.columns.get_loc("b_border")] = monthly_param_dataset["b"].quantile(q=0.5)
    monthly_fixed_params.iloc[i, monthly_fixed_params.columns.get_loc("h_border")] = monthly_param_dataset["h"].quantile(q=0.5)
    monthly_fixed_params.iloc[i, monthly_fixed_params.columns.get_loc("s_border")] = monthly_param_dataset["s"].quantile(q=0.5)
    monthly_fixed_params.iloc[i, monthly_fixed_params.columns.get_loc("ivol_border")] = monthly_param_dataset["ivol"].quantile(q=0.5)

print(monthly_fixed_params.head(3).to_markdown(), "\n")

|    | mdate               |   MktRF |     SMB |     HML |     RF |    WML |   b_border |   h_border |   s_border |   ivol_border |
|---:|:--------------------|--------:|--------:|--------:|-------:|-------:|-----------:|-----------:|-----------:|--------------:|
|  0 | 1991-06-01 00:00:00 | -0.0741 |  0.0061 | -0.0069 | 0.0042 | 0.0054 |    1.07925 |    -0.083  |     0.3317 |       0.0132  |
|  1 | 1991-07-01 00:00:00 |  0.053  | -0.0323 | -0.0035 | 0.0049 | 0.0506 |    1.08765 |    -0.1328 |     0.3626 |       0.01275 |
|  2 | 1991-08-01 00:00:00 |  0.0142 | -0.0093 | -0.0008 | 0.0046 | 0.0377 |    1.0084  |    -0.0946 |     0.3488 |       0.0134  | 



Now we can setup trades. We parse all companies and check for each month if their residuals are higher than border

In [4]:
monthly_dataset["Trade_Status"] = np.zeros(len(monthly_dataset))    # Creating empty column to hold trade status (1: long, -1: short, 0: None)

for i in range(len(monthly_dataset)):

    current_date = monthly_dataset.iloc[i]["mdate"]     # Parsing in respect to month

    # Long
    # We are checking for which residuals the stock performed better this month
    flag_1 = monthly_dataset.iloc[i]["b"] > monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["b_border"].values
    flag_2 = monthly_dataset.iloc[i]["h"] > monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["h_border"].values
    flag_3 = monthly_dataset.iloc[i]["s"] > monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["s_border"].values
    flag_4 = monthly_dataset.iloc[i]["ivol"] > monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["ivol_border"].values
    # Combining signals
    if (flag_1 and flag_2 and flag_3 and flag_4):
        monthly_dataset.iloc[i, monthly_dataset.columns.get_loc('Trade_Status')] = 1    # If it did we will enter trade at the start of the next month

    # Shorting does not improve a strategy in our case
    # # Short
    # flag_1 = monthly_dataset.iloc[i]["b"] < monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["b_border"].values
    # flag_2 = monthly_dataset.iloc[i]["h"] < monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["h_border"].values
    # flag_3 = monthly_dataset.iloc[i]["s"] < monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["s_border"].values
    # flag_4 = monthly_dataset.iloc[i]["ivol"] < monthly_fixed_params.loc[monthly_fixed_params["mdate"] == current_date]["ivol_border"].values

    # if (flag_1 and flag_2 and flag_3 and flag_4):
    #     monthly_dataset.iloc[i, monthly_dataset.columns.get_loc('Trade_Status')] = -1

print(monthly_dataset.head(3).to_markdown(), "\n")

|    | ISIN         | mdate               |     RET |   RET11 |      ME |      b |       h |       s |   ivol |   Trade_Status |
|---:|:-------------|:--------------------|--------:|--------:|--------:|-------:|--------:|--------:|-------:|---------------:|
|  0 | ANN4327C1220 | 1991-06-01 00:00:00 | -0.0256 | -0.2089 | 712.452 | 1.2541 | -0.9141 | -0.1884 | 0.0074 |              0 |
|  1 | ANN4327C1220 | 1991-07-01 00:00:00 |  0.024  |  0.1856 | 729.56  | 1.1767 | -1.2208 | -0.2632 | 0.0075 |              0 |
|  2 | ANN4327C1220 | 1991-08-01 00:00:00 | -0.0799 |  0.2478 | 671.284 | 0.9473 | -1.7583 | -0.5612 | 0.0189 |              0 | 



In [5]:
# Creating dataframe with final returns
monthly_returns = pd.DataFrame()
monthly_returns['mdate'] = monthly_fixed_params["mdate"].dt.date
monthly_returns["returns"] = np.zeros(len(monthly_returns))
monthly_returns["positions"] = np.zeros(len(monthly_returns))

for i in range(len(monthly_dataset)-1):
    
    # If in next entry is still same assett
    if (monthly_dataset.iloc[i]["ISIN"] == monthly_dataset.iloc[i+1]["ISIN"]):      # If there is next month for the current stock
        # If stocks was marked as long, we will trade it in the beginning of the next month
        if (monthly_dataset.iloc[i]["Trade_Status"] == 1):
            row = monthly_returns.loc[monthly_returns['mdate'] == monthly_dataset.iloc[i+1]["mdate"]].index     # Find row which corresponds to the right month to find returns
            monthly_returns.loc[row, "returns"] += monthly_dataset.iloc[i+1]["RET"]     # Updating returns
            monthly_returns.loc[row, "positions"] += 1                                  # Updating amount of positions in the month (to calculate correct size of position)

        # Shorting does not improve a strategy in our c-ase
        #if (monthly_dataset.iloc[i]["Trade_Status"] == -1):
        #     # Find row which corresponds to the right month to find returns
        #    row = monthly_returns.loc[monthly_returns['mdate'] == monthly_dataset.iloc[i+1]["mdate"]].index
        #    monthly_returns.loc[row, "return"] += -monthly_dataset.iloc[i+1]["RET"]

  result = libops.scalar_compare(x.ravel(), y, op)


In [6]:
print(monthly_returns.to_markdown())
print("\nFinal Results:\n")
print("Strategy Return (Only): ", (monthly_returns["returns"]/monthly_returns["positions"]).sum())      # Equal weights are assumed
print("Strategy STD (Only): ", (monthly_returns["returns"]/monthly_returns["positions"]).std())      # Equal weights are assumed
print("Market Return: ", monthly_fixed_params["MktRF"].sum())
print("Risk Free Return: ", monthly_fixed_params["RF"].sum())

|     | mdate      |   returns |   positions |
|----:|:-----------|----------:|------------:|
|   0 | 1991-06-01 |    0      |           0 |
|   1 | 1991-07-01 |    0.143  |          33 |
|   2 | 1991-08-01 |    0.6178 |          29 |
|   3 | 1991-09-01 |    0.6388 |          28 |
|   4 | 1991-10-01 |    0.1283 |          26 |
|   5 | 1991-11-01 |   -1.4079 |          35 |
|   6 | 1991-12-01 |    1.4461 |          30 |
|   7 | 1992-01-01 |    0.5508 |          36 |
|   8 | 1992-02-01 |   -0.0379 |          35 |
|   9 | 1992-03-01 |    1.0551 |          31 |
|  10 | 1992-04-01 |    0.0465 |          36 |
|  11 | 1992-05-01 |    2.2394 |          27 |
|  12 | 1992-06-01 |   -0.0213 |          34 |
|  13 | 1992-07-01 |   -0.3428 |          31 |
|  14 | 1992-08-01 |    0.3707 |          23 |
|  15 | 1992-09-01 |   -2.4584 |          39 |
|  16 | 1992-10-01 |   -3.9396 |          31 |
|  17 | 1992-11-01 |   -0.6676 |          36 |
|  18 | 1992-12-01 |   -1.6845 |          46 |
|  19 | 1993-