## 2 The Data

In [1]:

import wrds
import pandas as pd
import datetime
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm




In [2]:
# Connect to WRDS
db = wrds.Connection()
db.create_pgpass_file()

WRDS recommends setting up a .pgpass file.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


## Start date modified from 2002-02-01 to 2002-04-01

In [3]:
#---------------------------------------------
# WRDS Monthly World Indices
#---------------------------------------------

df = db.raw_sql("""
    SELECT * 
    FROM wrdsapps_windices.mwcountryreturns
    WHERE date BETWEEN '2002-04-01' AND '2024-12-31'
    AND country IN ('AUSTRALIA', 'SWITZERLAND', 'GERMANY', 'FRANCE', 'JAPAN', 'UNITED KINGDOM')
""")

df = df[["date", "country", "mportret", "currency"]]


#---------------------------------------------
# Risk Free Rate 
#---------------------------------------------
rf=db.raw_sql("""select  mcaldt,tmytm 
           from crsp.tfz_mth_rf            
            where kytreasnox = 2000001 
           and mcaldt>='2002-04-01'
            and mcaldt<='2024-12-31'""", date_cols=['mcaldt'])
rf['tmytm']= np.exp(rf['tmytm']/12/100)-1
rf=rf.rename(columns={ "mcaldt": "date","tmytm": "rf"})

display(rf)

#---------------------------------------------
# Value Weighted Index Returns US
#---------------------------------------------
rm=db.raw_sql("""select  date,vwretd from crsp.msi 
                where date>='2002-04-01' and date<='2024-12-31'
                """,date_cols=['date'])
rm = rm.rename(columns={'vwretd':'rm'})


#---------------------------------------------
# Convert date formats
#---------------------------------------------
rm['date'] = pd.to_datetime(rm['date'], format='%Y-%m-%d')
rf['date'] = pd.to_datetime(rf['date'], format='%Y-%m-%d')
rm['date'] = pd.to_datetime(rm['date']).dt.to_period('M').dt.to_timestamp()
rf['date'] = pd.to_datetime(rf['date']).dt.to_period('M').dt.to_timestamp()

#---------------------------------------------
# FX Rates
#---------------------------------------------
JPUS = pd.read_csv('EXJPUS.csv', sep=',')
SZUS = pd.read_csv('EXSZUS.csv', sep=',')
USEU = pd.read_csv('EXUSEU.csv', sep=',')
USAL = pd.read_csv('EXUSAL.csv', sep=',')
USUK = pd.read_csv('EXUSUK.csv', sep=',')

megre = pd.merge(SZUS, JPUS, on='observation_date', how='outer')
megre = pd.merge(megre, USEU, on='observation_date', how='outer')
megre = pd.merge(megre, USAL, on='observation_date', how='outer')
fx = pd.merge(megre, USUK, on='observation_date', how='outer')
fx = fx[(fx['observation_date'] >= '2002-04-01') & (fx['observation_date'] <= '2024-12-31')]
fx['EXSZUS'] = 1/fx['EXSZUS']
fx['EXJPUS'] = 1/fx['EXJPUS']
fx = fx.rename(columns={
    'observation_date': 'date',
    'EXSZUS': 'CHF',
    'EXJPUS': 'JPY',
    'EXUSEU': 'EUR',
    'EXUSAL': 'AUD',
    'EXUSUK': 'GBP'
})

fx['date'] = pd.to_datetime(fx['date'], format='%Y-%m-%d')
fx['date'] += datetime.timedelta(days=-1) # Adjusting to the last day of the month

# ---------------------------------------------
# Interbank Rates 3M
# ---------------------------------------------
rate_au = pd.read_csv("IR3TIB01AUM156N.csv", sep=',')
rate_ch = pd.read_csv("IR3TIB01CHM156N.csv", sep=',')
rate_de = pd.read_csv("IR3TIB01EZM156N.csv", sep=',')
rate_fr = pd.read_csv("IR3TIB01EZM156N.csv", sep=',')
rate_jp = pd.read_csv("IR3TIB01JPM156N.csv", sep=',')
rate_uk = pd.read_csv("IR3TIB01GBM156N.csv", sep=',')
rate_us = pd.read_csv("IR3TIB01USM156N.csv", sep=',')

rates = pd.merge(rate_au, rate_ch, on='observation_date', how='outer')
rates = pd.merge(rates, rate_de, on='observation_date', how='outer')
rates = pd.merge(rates, rate_fr, on='observation_date', how='outer')
rates = pd.merge(rates, rate_jp, on='observation_date', how='outer')
rates = pd.merge(rates, rate_uk, on='observation_date', how='outer')
rates = pd.merge(rates, rate_us, on='observation_date', how='outer')
rates = rates[(rates['observation_date'] >= '2002-04-01') & (rates['observation_date'] <= '2024-12-31')]
rates = rates.rename(columns={
    'observation_date': 'date',
    'IR3TIB01AUM156N': 'AUSTRALIA',
    'IR3TIB01CHM156N': 'SWITZERLAND',
    'IR3TIB01EZM156N_x': 'GERMANY',
    'IR3TIB01EZM156N_y': 'FRANCE',
    'IR3TIB01JPM156N': 'JAPAN',
    'IR3TIB01GBM156N': 'UNITED KINGDOM',
    'IR3TIB01USM156N': 'UNITED STATES'
})



#divide by 100 to get the rates in percentage
rates[['AUSTRALIA', 'SWITZERLAND', 'GERMANY', 'FRANCE', 'JAPAN', 'UNITED KINGDOM', 'UNITED STATES']] = rates[['AUSTRALIA', 'SWITZERLAND', 'GERMANY', 'FRANCE', 'JAPAN', 'UNITED KINGDOM', 'UNITED STATES']] / 100 /12

rates['date'] = pd.to_datetime(rates['date'], format='%Y-%m-%d')
rates['date'] += datetime.timedelta(days=-1)
print(rates.head())



#---------------------------------------------
# Adjusting the dates
#---------------------------------------------
df['date'] = pd.to_datetime(df['date']).dt.to_period('M').dt.to_timestamp()
fx['date'] = pd.to_datetime(fx['date']).dt.to_period('M').dt.to_timestamp()
rates['date'] = pd.to_datetime(rates['date']).dt.to_period('M').dt.to_timestamp()

df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
fx['date'] = pd.to_datetime(fx['date'], format='%Y-%m-%d')
rates['date'] = pd.to_datetime(rates['date'], format='%Y-%m-%d')

df = pd.merge(df, fx, on='date',  how='left')
df['fx'] = df.apply(lambda row: row[row['currency']], axis=1)
df = df.drop(columns=['CHF', 'JPY', 'EUR', 'AUD', 'GBP'])

df = pd.merge(df, rf, on='date', how='left')
df = pd.merge(df, rm, on='date', how='left')

df = pd.merge(df, rates, on='date',  how='left')
df['rates'] = df.apply(lambda row: row[row['country']], axis=1)
df = df.drop(columns=['AUSTRALIA', 'SWITZERLAND', 'GERMANY', 'FRANCE', 'JAPAN', 'UNITED KINGDOM', 'UNITED STATES'])



Unnamed: 0,date,rf
0,2002-04-30,0.001462
1,2002-05-31,0.001409
2,2002-06-28,0.00139
3,2002-07-31,0.001417
4,2002-08-30,0.001381
...,...,...
268,2024-08-30,0.00439
269,2024-09-30,0.003834
270,2024-10-31,0.003759
271,2024-11-29,0.003481


          date  AUSTRALIA  SWITZERLAND   GERMANY    FRANCE     JAPAN  \
454 2002-03-31   0.003825     0.001217  0.002839  0.002839  0.000083   
455 2002-04-30   0.004033     0.000970  0.002889  0.002889  0.000067   
456 2002-05-31   0.004225     0.000958  0.002887  0.002887  0.000075   
457 2002-06-30   0.004150     0.000836  0.002842  0.002842  0.000075   
458 2002-07-31   0.004133     0.000557  0.002793  0.002793  0.000058   

     UNITED KINGDOM  UNITED STATES  
454        0.003478       0.001558  
455        0.003461       0.001517  
456        0.003481       0.001508  
457        0.003384       0.001492  
458        0.003322       0.001442  


## Data Cleaning (Handling)

In [4]:
# Handling NaN


# 1. Handle mportret NaN values - replace with country-specific mean
for country in df['country'].unique():
    country_mask = df['country'] == country
    country_mean = df.loc[country_mask, 'mportret'].mean()
    nan_count = df.loc[country_mask, 'mportret'].isna().sum()
    
    if nan_count > 0:
        print(f"   {country}: Replacing {nan_count} NaN values with mean {country_mean:.6f}")
        df.loc[country_mask, 'mportret'] = df.loc[country_mask, 'mportret'].fillna(country_mean)

# 2. Handle fx NaN values - replace with country-specific mean
for country in df['country'].unique():
    country_mask = df['country'] == country
    country_fx_mean = df.loc[country_mask, 'fx'].mean()
    nan_count = df.loc[country_mask, 'fx'].isna().sum()
    
    if nan_count > 0:
        print(f"   {country}: Replacing {nan_count} FX NaN values with mean {country_fx_mean:.6f}")
        df.loc[country_mask, 'fx'] = df.loc[country_mask, 'fx'].fillna(country_fx_mean)

# 3. Handle rf (US risk-free rate) NaN values - forward fill then backward fill
rf_nan_count = df['rf'].isna().sum()
if rf_nan_count > 0:
    print(f"   Replacing {rf_nan_count} rf NaN values using forward/backward fill")
    df['rf'] = df['rf'].fillna(method='ffill').fillna(method='bfill')

# 4. Handle rm (US market return) NaN values - forward fill then backward fill
rm_nan_count = df['rm'].isna().sum()
if rm_nan_count > 0:
    print(f"   Replacing {rm_nan_count} rm NaN values using forward/backward fill")
    df['rm'] = df['rm'].fillna(method='ffill').fillna(method='bfill')

# 5. Handle rates NaN values - replace with country-specific mean
for country in df['country'].unique():
    country_mask = df['country'] == country
    country_rates_mean = df.loc[country_mask, 'rates'].mean()
    nan_count = df.loc[country_mask, 'rates'].isna().sum()
    
    if nan_count > 0:
        print(f"   {country}: Replacing {nan_count} rates NaN values with mean {country_rates_mean:.6f}")
        df.loc[country_mask, 'rates'] = df.loc[country_mask, 'rates'].fillna(country_rates_mean)


   AUSTRALIA: Replacing 1 FX NaN values with mean 0.784682
   SWITZERLAND: Replacing 1 FX NaN values with mean 0.978099
   GERMANY: Replacing 1 FX NaN values with mean 1.218350
   FRANCE: Replacing 1 FX NaN values with mean 1.218350
   UNITED KINGDOM: Replacing 1 FX NaN values with mean 1.532098
   JAPAN: Replacing 1 FX NaN values with mean 0.009296
   AUSTRALIA: Replacing 1 rates NaN values with mean 0.002965
   SWITZERLAND: Replacing 1 rates NaN values with mean 0.000179
   GERMANY: Replacing 1 rates NaN values with mean 0.001115
   FRANCE: Replacing 1 rates NaN values with mean 0.001115
   UNITED KINGDOM: Replacing 1 rates NaN values with mean 0.001918
   JAPAN: Replacing 1 rates NaN values with mean 0.000175


## EX 3 A

In [5]:
df['fx_t+1'] = df['fx'].shift(-1)
df['returns_USD'] = (1 + df['mportret']) * (1 + (df['fx_t+1'] - df['fx'])/df['fx']) - 1
df['rates_USD'] = (1 + df['rates']) * (1 + (df['fx_t+1'] - df['fx'])/df['fx']) - 1
display(df)


Unnamed: 0,date,country,mportret,currency,fx,rf,rm,rates,fx_t+1,returns_USD,rates_USD
0,2002-04-01,AUSTRALIA,-0.014302,AUD,0.549800,0.001462,-0.0496,0.004033,0.568200,0.018686,0.037635
1,2002-05-01,AUSTRALIA,-0.000204,AUD,0.568200,0.001409,-0.01051,0.004225,0.553800,-0.025542,-0.021225
2,2002-06-01,AUSTRALIA,-0.026756,AUD,0.553800,0.00139,-0.070259,0.004150,0.541300,-0.048724,-0.018515
3,2002-07-01,AUSTRALIA,-0.037816,AUD,0.541300,0.001417,-0.081125,0.004133,0.546500,-0.028572,0.013780
4,2002-08-01,AUSTRALIA,0.016208,AUD,0.546500,0.001381,0.007949,0.004100,0.550200,0.023088,0.010898
...,...,...,...,...,...,...,...,...,...,...,...
1633,2024-08-01,JAPAN,-0.024404,JPY,0.006995,0.00439,0.021572,0.000213,0.006672,-0.069555,-0.046076
1634,2024-09-01,JAPAN,-0.014245,JPY,0.006672,0.003834,0.020969,0.000213,0.006506,-0.038753,-0.024655
1635,2024-10-01,JAPAN,0.016975,JPY,0.006506,0.003759,-0.008298,0.000302,0.006501,0.016302,-0.000360
1636,2024-11-01,JAPAN,-0.007304,JPY,0.006501,0.003481,0.064855,0.000322,0.009296,0.419463,0.430367


## 3 B

In [6]:
# Calculate currency-hedged component
df['X'] = (df['fx_t+1'] / df['fx']) * (1 + df['rates']) - (1 + df['rf'])

# Calculate hedged index return
df['hedged_return'] = df['returns_USD'] - df['X']
display(df)



Unnamed: 0,date,country,mportret,currency,fx,rf,rm,rates,fx_t+1,returns_USD,rates_USD,X,hedged_return
0,2002-04-01,AUSTRALIA,-0.014302,AUD,0.549800,0.001462,-0.0496,0.004033,0.568200,0.018686,0.037635,0.036173,-0.017487
1,2002-05-01,AUSTRALIA,-0.000204,AUD,0.568200,0.001409,-0.01051,0.004225,0.553800,-0.025542,-0.021225,-0.022634,-0.002908
2,2002-06-01,AUSTRALIA,-0.026756,AUD,0.553800,0.00139,-0.070259,0.004150,0.541300,-0.048724,-0.018515,-0.019905,-0.028819
3,2002-07-01,AUSTRALIA,-0.037816,AUD,0.541300,0.001417,-0.081125,0.004133,0.546500,-0.028572,0.013780,0.012362,-0.040935
4,2002-08-01,AUSTRALIA,0.016208,AUD,0.546500,0.001381,0.007949,0.004100,0.550200,0.023088,0.010898,0.009517,0.013571
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1633,2024-08-01,JAPAN,-0.024404,JPY,0.006995,0.00439,0.021572,0.000213,0.006672,-0.069555,-0.046076,-0.050466,-0.019089
1634,2024-09-01,JAPAN,-0.014245,JPY,0.006672,0.003834,0.020969,0.000213,0.006506,-0.038753,-0.024655,-0.028488,-0.010265
1635,2024-10-01,JAPAN,0.016975,JPY,0.006506,0.003759,-0.008298,0.000302,0.006501,0.016302,-0.000360,-0.004119,0.020421
1636,2024-11-01,JAPAN,-0.007304,JPY,0.006501,0.003481,0.064855,0.000322,0.009296,0.419463,0.430367,0.426886,-0.007423


## 3C

In [7]:
#-----------------------------------------------
# Unhedged Index Returns
#-----------------------------------------------

# Creating another df with US data incorporated with the rest

us_only = df[df["country"] == "AUSTRALIA"].copy()
us_only["country"] = "USA"
us_only["returns_USD"] = us_only["rm"]
us_only["mportret"] = us_only["rm"]
us_only["fx"] = np.ones(len(us_only["fx"]))
us_only["currency"] = ["USD"] * len(us_only["fx"])
us_only["rates"] = np.nan   
us_only["rates_USD"] = np.nan
us_only["fx_t+1"] = np.ones(len(us_only["fx"]))
us_only["hedged_return"] = us_only["rm"]  #not hedged but used for next part
us_only["X"] = np.zeros(len(us_only["fx"]))
us_only.loc[us_only.index[-1], "fx_t+1"] = np.nan
df_with_us = pd.concat([df, us_only], ignore_index=True)   #adding the US df to the main one

display(df_with_us)


#-----------------------------------------------
#equaly weighted index returns
#-----------------------------------------------
results_eq = df_with_us.groupby('date').returns_USD.mean()
display(results_eq)
mean_eq = results_eq.mean()*12
std_eq = results_eq.std()*np.sqrt(12)
sharpe_eq = (mean_eq - 12*df_with_us['rf'].mean())/std_eq

#-----------------------------------------------
# Risk parity index returns
#-----------------------------------------------
result_rp = df_with_us.groupby('country').returns_USD.rolling(60).std().reset_index().rename(columns={'returns_USD': 'std'})
result_rp['date'] = df_with_us['date']
result_rp['weights'] = 1 / result_rp['std']
result_rp['weights'] = result_rp['weights'] / (pd.concat([result_rp.groupby('date').weights.sum().reset_index()['weights']] * 6, ignore_index=True))
result_rp.drop(columns=['level_1', 'std'], inplace=True)
merged_rp = pd.merge(df_with_us, result_rp, on=['date', 'country'])
merged_rp['weighted_return'] = merged_rp['returns_USD'] * merged_rp['weights']
results_rp = merged_rp.groupby('date').weighted_return.sum()
mean_rp = results_rp.mean()*12
std_rp = results_rp.std()*np.sqrt(12)
sharpe_rp = (mean_rp - 12*df_with_us['rf'].mean())/std_rp

#-----------------------------------------------
# Mean variance portfolio returns
#-----------------------------------------------
gamma = 1.0
pivoted_returns = df_with_us.pivot(index='date', columns='country', values='returns_USD')
countries = list(pivoted_returns.columns)
rolling_cov_matrices = pivoted_returns.rolling(window=60).cov().dropna()
rolling_returns_USD = df_with_us.groupby("country").returns_USD.rolling(window=60).mean().reset_index()
rolling_returns_USD['date'] = df_with_us['date']

# Extract covariance matrices as a dictionary
cov_matrices_dict = {
    date: rolling_cov_matrices.loc[date] 
    for date in rolling_cov_matrices.index.get_level_values(0).unique()
}

#for date, cov_matrix in cov_matrices_dict.items():
#    display(cov_matrix.values)

inverted_cov_matrices = {
    date: np.linalg.pinv(cov_matrix.values) for date, cov_matrix in cov_matrices_dict.items()
}

a = {
    date: inv_cov_mat @ (rolling_returns_USD[rolling_returns_USD['date']==date].returns_USD.values - df_with_us[df_with_us['date']==date].rf.values) for date, inv_cov_mat in inverted_cov_matrices.items()
}

weights_mv = []
for date, arr in a.items():
    for idx, val in enumerate(arr):
        weights_mv.append({'date': date, 'weights': val, 'country': countries[idx]})

weights_mv = pd.DataFrame(weights_mv)

merged_mv = pd.merge(df_with_us, weights_mv, on=['date', 'country'])
merged_mv['weighted_return'] = merged_mv['returns_USD'] * merged_mv['weights']
results_mv = merged_mv.groupby('date').weighted_return.sum()
mean_mv = results_mv.mean()*12
std_mv = results_mv.std()*np.sqrt(12)
sharpe_mv = (mean_mv - 12*df_with_us['rf'].mean())/std_mv

summary_unhedged = pd.DataFrame({
    'Portfolio': ['Equal Weight', 'Risk Parity', 'Mean-Variance'],
    'Mean (Annualized)': [mean_eq, mean_rp, mean_mv],
    'Std Dev (Annualized)': [std_eq, std_rp, std_mv],
    'Sharpe Ratio': [sharpe_eq, sharpe_rp, sharpe_mv]
})

display(summary_unhedged)

Unnamed: 0,date,country,mportret,currency,fx,rf,rm,rates,fx_t+1,returns_USD,rates_USD,X,hedged_return
0,2002-04-01,AUSTRALIA,-0.014302,AUD,0.5498,0.001462,-0.0496,0.004033,0.5682,0.018686,0.037635,0.036173,-0.017487
1,2002-05-01,AUSTRALIA,-0.000204,AUD,0.5682,0.001409,-0.01051,0.004225,0.5538,-0.025542,-0.021225,-0.022634,-0.002908
2,2002-06-01,AUSTRALIA,-0.026756,AUD,0.5538,0.00139,-0.070259,0.004150,0.5413,-0.048724,-0.018515,-0.019905,-0.028819
3,2002-07-01,AUSTRALIA,-0.037816,AUD,0.5413,0.001417,-0.081125,0.004133,0.5465,-0.028572,0.013780,0.012362,-0.040935
4,2002-08-01,AUSTRALIA,0.016208,AUD,0.5465,0.001381,0.007949,0.004100,0.5502,0.023088,0.010898,0.009517,0.013571
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1906,2024-08-01,USA,0.021572,USD,1.0000,0.00439,0.021572,,1.0000,0.021572,,0.0,0.021572
1907,2024-09-01,USA,0.020969,USD,1.0000,0.003834,0.020969,,1.0000,0.020969,,0.0,0.020969
1908,2024-10-01,USA,-0.008298,USD,1.0000,0.003759,-0.008298,,1.0000,-0.008298,,0.0,-0.008298
1909,2024-11-01,USA,0.064855,USD,1.0000,0.003481,0.064855,,1.0000,0.064855,,0.0,0.064855


date
2002-04-01    0.005435
2002-05-01    0.022302
2002-06-01   -0.082963
2002-07-01   -0.085715
2002-08-01   -0.001898
                ...   
2024-08-01   -0.010428
2024-09-01   -0.014347
2024-10-01   -0.028468
2024-11-01     0.17251
2024-12-01   -0.220471
Name: returns_USD, Length: 273, dtype: Float64

Unnamed: 0,Portfolio,Mean (Annualized),Std Dev (Annualized),Sharpe Ratio
0,Equal Weight,0.07994,0.154138,0.420906
1,Risk Parity,0.036091,0.113847,0.184708
2,Mean-Variance,2.120682,1.459012,1.443182


In [8]:
#-----------------------------------------------
# Equal weighted hedged index returns 
#-----------------------------------------------
results_eqh = df_with_us.groupby('date').hedged_return.mean()
mean_eqh = results_eqh.mean()*12
std_eqh = results_eqh.std()*np.sqrt(12)
sharpe_eqh = (mean_eqh - 12*df_with_us['rf'].mean())/std_eqh

#-----------------------------------------------
# Risk parity hedged index returns 
#-----------------------------------------------
result_rph = df_with_us.groupby('country').hedged_return.rolling(60).std().reset_index().rename(columns={'hedged_return': 'std'})
result_rph['date'] = df_with_us['date']
result_rph['weights'] = 1 / result_rph['std']

# Normalize weights to sum to 1 for each date 
weights_sum_by_date = result_rph.groupby('date')['weights'].transform('sum')
result_rph['weights'] = result_rph['weights'] / weights_sum_by_date

result_rph.drop(columns=['level_1', 'std'], inplace=True)
merged_rph = pd.merge(df_with_us, result_rph, on=['date', 'country'])
merged_rph['weighted_return'] = merged_rph['hedged_return'] * merged_rph['weights']
results_rph = merged_rph.groupby('date').weighted_return.sum()
mean_rph = results_rph.mean()*12
std_rph = results_rph.std()*np.sqrt(12)
sharpe_rph = (mean_rph - 12*df_with_us['rf'].mean())/std_rph

#-----------------------------------------------
# Mean variance hedged portfolio returns 
#-----------------------------------------------
gamma = 1.0
pivoted_returns_hedged = df_with_us.pivot(index='date', columns='country', values='hedged_return')
countries_hedged = list(pivoted_returns_hedged.columns)
rolling_cov_matrices_hedged = pivoted_returns_hedged.rolling(window=60).cov().dropna()
rolling_hedged_return = df_with_us.groupby("country").hedged_return.rolling(window=60).mean().reset_index()
rolling_hedged_return['date'] = df_with_us['date']

# Extract covariance matrices as a dictionary
cov_matrices_dict_hedged = {
    date: rolling_cov_matrices_hedged.loc[date] 
    for date in rolling_cov_matrices_hedged.index.get_level_values(0).unique()
}

inverted_cov_matrices_hedged = {
    date: np.linalg.pinv(cov_matrix.values) for date, cov_matrix in cov_matrices_dict_hedged.items()
}

a_hedged = {
    date: inv_cov_mat @ (rolling_hedged_return[rolling_hedged_return['date']==date].hedged_return.values - df_with_us[df_with_us['date']==date].rf.values) 
    for date, inv_cov_mat in inverted_cov_matrices_hedged.items()
}

weights_mvh = []
for date, arr in a_hedged.items():
    for idx, val in enumerate(arr):
        weights_mvh.append({'date': date, 'weights': val, 'country': countries_hedged[idx]})

weights_mvh = pd.DataFrame(weights_mvh)

merged_mvh = pd.merge(df_with_us, weights_mvh, on=['date', 'country'])
merged_mvh['weighted_return'] = merged_mvh['hedged_return'] * merged_mvh['weights']
results_mvh = merged_mvh.groupby('date').weighted_return.sum()
mean_mvh = results_mvh.mean()*12
std_mvh = results_mvh.std()*np.sqrt(12)
sharpe_mvh = (mean_mvh - 12*df_with_us['rf'].mean())/std_mvh


summary_hedged = pd.DataFrame({
    'Portfolio': ['Equal Weight', 'Risk Parity', 'Mean-Variance'],
    'Mean (Annualized)': [mean_eqh, mean_rph, mean_mvh],
    'Std Dev (Annualized)': [std_eqh, std_rph, std_mvh],
    'Sharpe Ratio': [sharpe_eqh, sharpe_rph, sharpe_mvh]
})


print("UNHEDGED")
display(summary_unhedged)
print("HEDGED")
display(summary_hedged)

UNHEDGED


Unnamed: 0,Portfolio,Mean (Annualized),Std Dev (Annualized),Sharpe Ratio
0,Equal Weight,0.07994,0.154138,0.420906
1,Risk Parity,0.036091,0.113847,0.184708
2,Mean-Variance,2.120682,1.459012,1.443182


HEDGED


Unnamed: 0,Portfolio,Mean (Annualized),Std Dev (Annualized),Sharpe Ratio
0,Equal Weight,0.07611,0.133418,0.457563
1,Risk Parity,0.053238,0.119034,0.320706
2,Mean-Variance,2.059455,1.501813,1.361282


## EX 4 A

In [31]:
#---------------------------------------------
# Equity Index Momentum Strategy (MOM)
# Construct long-short momentum strategy
#---------------------------------------------

# Calculate 11-month cumulative returns (t-12 to t-1)
df_mom = df_with_us.copy()
df_mom = df_mom.sort_values(['country', 'date'])

# Calculate 11-month rolling return (excluding current month)
df_mom['cum_return_11m'] = df_mom.groupby('country')['hedged_return'].transform(
    lambda x: x.shift(1).rolling(window=11, min_periods=11).apply(lambda y: (1 + y).prod() - 1)
)

# Remove rows with NaN cumulative returns (first 11 months for each country)
df_mom = df_mom.dropna(subset=['cum_return_11m'])

# For each date, rank countries based on their 11-month lagged returns
df_mom['rank'] = df_mom.groupby('date')['cum_return_11m'].rank(method='min')

# Get number of countries for each date
N_by_date = df_mom.groupby('date')['country'].count()        #to make sure we don't have a problem in case a NaN slipped through, we compute the N every time 
df_mom = df_mom.merge(N_by_date.rename('N'), left_on='date', right_index=True)

# Calculate momentum weights: w_i,t = Z * (Rank_i,t - (N+1)/2)
df_mom['raw_weight'] = df_mom['rank'] - (df_mom['N'] + 1) / 2

# Calculate Z factor to ensure long positions sum to +1 and short positions sum to -1
def calculate_z_factor(raw_weights):
    positive_weights = raw_weights[raw_weights > 0]
    negative_weights = raw_weights[raw_weights < 0]
    
    if len(positive_weights) > 0 and len(negative_weights) > 0:
        sum_positive = positive_weights.sum()
        sum_negative = abs(negative_weights.sum())
        z_factor = 2 / (sum_positive + sum_negative)
    else:
        z_factor = 0  # Handle edge case where all weights have same sign
    
    return z_factor

z_factors = df_mom.groupby('date')['raw_weight'].apply(calculate_z_factor)
df_mom = df_mom.merge(z_factors.rename('Z'), left_on='date', right_index=True)

# Calculate final momentum weights
df_mom['momentum_weight'] = df_mom['Z'] * df_mom['raw_weight']


# Get unique dates and check weights for first 5 dates
unique_dates = df_mom['date'].unique()[:5]

for date in unique_dates:
    date_weights = df_mom[df_mom['date'] == date]['momentum_weight']
    long_sum = date_weights[date_weights > 0].sum()
    short_sum = date_weights[date_weights < 0].sum()
    total_sum = date_weights.sum()
    
    # Handle different date formats (for robustness)
    if hasattr(date, 'strftime'):
        date_str = date.strftime('%Y-%m')
    else:
        date_str = str(date)[:7] 
    

# Calculate momentum strategy returns
df_mom['momentum_return'] = df_mom['hedged_return'] * df_mom['momentum_weight']
mom_returns = df_mom.groupby('date')['momentum_return'].sum()

display(df_mom[['date', 'country', 'cum_return_11m', 'rank', 'momentum_weight']])

Unnamed: 0,date,country,cum_return_11m,rank,momentum_weight
11,2003-03-01,AUSTRALIA,-0.190394,7.0,0.500000
12,2003-04-01,AUSTRALIA,-0.143743,7.0,0.500000
13,2003-05-01,AUSTRALIA,-0.103470,7.0,0.500000
14,2003-06-01,AUSTRALIA,-0.074914,6.0,0.333333
15,2003-07-01,AUSTRALIA,-0.021027,6.0,0.333333
...,...,...,...,...,...
1906,2024-08-01,USA,0.219583,6.0,0.333333
1907,2024-09-01,USA,0.307874,7.0,0.500000
1908,2024-10-01,USA,0.374987,7.0,0.500000
1909,2024-11-01,USA,0.248637,7.0,0.500000


## 4 B

In [None]:
#---------------------------------------------
# Calculate Long and Short Leg Returns Separately
#---------------------------------------------

# Separate long and short positions
df_mom_long = df_mom[df_mom['momentum_weight'] > 0].copy()
df_mom_short = df_mom[df_mom['momentum_weight'] < 0].copy()

# Calculate long leg returns (sum of all long position returns)     (of the 3 Countries)
long_leg_returns = df_mom_long.groupby('date')['momentum_return'].sum()

# Calculate short leg returns (sum of all short position returns)   (of the 3 Countries)
short_leg_returns = df_mom_short.groupby('date')['momentum_return'].sum()

# Total momentum strategy returns (should match what you calculated before)
total_mom_returns = mom_returns  # We already computed it before but we rename it for clarity purposes

# We ensure all series have the same dates
common_dates = long_leg_returns.index.intersection(short_leg_returns.index)
long_leg_returns = long_leg_returns.reindex(common_dates)
short_leg_returns = short_leg_returns.reindex(common_dates)
total_mom_returns = total_mom_returns.reindex(common_dates)


#---------------------------------------------
# Calculate Performance Statistics
#---------------------------------------------

# Function to calculate all statistics at once
def calculate_stats(returns_series, name):
    mean_monthly = returns_series.mean()
    std_monthly = returns_series.std()
    
    mean_annual = mean_monthly * 12
    std_annual = std_monthly * np.sqrt(12)
    sharpe_ratio = mean_annual / std_annual if std_annual != 0 else 0
    
    # T-statistic for testing if mean is significantly different from zero
    n_obs = len(returns_series.dropna())
    t_stat = mean_monthly / (std_monthly / np.sqrt(n_obs)) if std_monthly != 0 else 0
    p_value = 2 * (1 - stats.t.cdf(abs(t_stat), n_obs - 1))  # Two-tailed test
    
    return {
        'Strategy': name,
        'Mean (Annualized)': mean_annual,
        'Std Dev (Annualized)': std_annual,
        'Sharpe Ratio': sharpe_ratio,
        'T-Statistic': t_stat,
        'P-Value': p_value,
        'Significant at 5%': 'Yes' if p_value < 0.05 else 'No'
    }

# Calculate statistics for each component
long_stats = calculate_stats(long_leg_returns, 'Long Leg')
short_stats = calculate_stats(short_leg_returns, 'Short Leg') 
total_stats = calculate_stats(total_mom_returns, 'Total MOM Strategy')

momentum_summary = pd.DataFrame([long_stats, short_stats, total_stats])
display(momentum_summary)



Unnamed: 0,Strategy,Mean (Annualized),Std Dev (Annualized),Sharpe Ratio,T-Statistic,P-Value,Significant at 5%
0,Long Leg,0.08102,0.137518,0.589158,2.752908,0.006321,Yes
1,Short Leg,-0.098825,0.138215,-0.715006,-3.340948,0.000957,Yes
2,Total MOM Strategy,-0.017805,0.079346,-0.224396,-1.048514,0.295372,No


## 4 C

In [33]:
# Regression Analysis: MOM vs DIV


# Get DIV strategy returns (currency hedged risk parity from Ex 3)
div_returns = results_rph  

# Ensure both series have the same dates
common_dates = total_mom_returns.index.intersection(div_returns.index)
mom_returns = total_mom_returns.reindex(common_dates)
div_returns = div_returns.reindex(common_dates)

# Remove any remaining NaN values (In case of any problems in date intersection)
valid_mask = ~(mom_returns.isna() | div_returns.isna())
mom_returns = mom_returns[valid_mask]
div_returns = div_returns[valid_mask]


# Run the regression: MOM = alpha + beta * DIV + epsilon
# Force conversion to native NumPy float arrays since problem with Float64 instead of float64
X = sm.add_constant(div_returns.astype(float).values)
y = mom_returns.astype(float).values
model = sm.OLS(y, X).fit()

# Calculate correlation
correlation = np.corrcoef(mom_returns, div_returns)[0, 1]

# Create results dataframe
results_MOM_DIV_df = pd.DataFrame({
    'Coefficient': ['Alpha', 'Beta', 'R²', 'Correlation'],
    'Value': [
        model.params[0],  # const
        model.params[1],  # beta
        model.rsquared,
        correlation
    ],
    'Statistical_Significance': [
        'Significant' if model.pvalues[0] < 0.05 else 'Not Significant',
        'Significant' if model.pvalues[1] < 0.05 else 'Not Significant',
        'N/A',
        'N/A'
    ],
    'P_Value': [
        model.pvalues[0],
        model.pvalues[1],
        np.nan,
        np.nan
    ]
})

display(results_MOM_DIV_df)

Unnamed: 0,Coefficient,Value,Statistical_Significance,P_Value
0,Alpha,-0.001543,Not Significant,0.281581
1,Beta,0.012782,Not Significant,0.752569
2,R²,0.000383,,
3,Correlation,0.019568,,


## EX 5 A

In [35]:
#---------------------------------------------
# Equity Index Long Term Reversal Strategy (REV)
#---------------------------------------------

# Calculate 5-year cumulative returns with 12-month lag (t-60 to t-12)
df_rev = df_with_us.copy()
df_rev = df_rev.sort_values(['country', 'date'])

# Calculate 48-month rolling return (5 years = 60 months, but t-60 to t-12 = 48 months)
# with 12-month lag (shift by 12)
df_rev['cum_return_5y'] = df_rev.groupby('country')['hedged_return'].transform(
    lambda x: x.shift(12).rolling(window=48, min_periods=48).apply(lambda y: (1 + y).prod() - 1)
)

# Remove rows with NaN cumulative returns (first 60 months for each country)
df_rev = df_rev.dropna(subset=['cum_return_5y'])

# For each date, rank countries based on their 5-year lagged returns
df_rev['rank'] = df_rev.groupby('date')['cum_return_5y'].rank(method='min')

# Get number of countries for each date
N_by_date = df_rev.groupby('date')['country'].count()    #to make sure we don't have a problem in case a NaN slipped through, we compute the N every time 
df_rev = df_rev.merge(N_by_date.rename('N'), left_on='date', right_index=True)

# Calculate reversal weights: w_i,t = Z * ((N+1)/2 - Rank_i,t)
df_rev['raw_weight'] = (df_rev['N'] + 1) / 2 - df_rev['rank']

# Calculate Z factor to ensure long positions sum to +1 and short positions sum to -1
def calculate_z_factor(raw_weights):
    positive_weights = raw_weights[raw_weights > 0]
    negative_weights = raw_weights[raw_weights < 0]
    
    if len(positive_weights) > 0 and len(negative_weights) > 0:
        sum_positive = positive_weights.sum()
        sum_negative = abs(negative_weights.sum())
        z_factor = 2 / (sum_positive + sum_negative)
    else:
        z_factor = 0  # Handle edge case where all weights have same sign
    
    return z_factor

z_factors = df_rev.groupby('date')['raw_weight'].apply(calculate_z_factor)
df_rev = df_rev.merge(z_factors.rename('Z'), left_on='date', right_index=True)

# Calculate final reversal weights
df_rev['reversal_weight'] = df_rev['Z'] * df_rev['raw_weight']



# Get unique dates and check weights for first 5 dates
unique_dates = df_rev['date'].unique()[:5]

for date in unique_dates:
    date_weights = df_rev[df_rev['date'] == date]['reversal_weight']
    long_sum = date_weights[date_weights > 0].sum()
    short_sum = date_weights[date_weights < 0].sum()
    total_sum = date_weights.sum()
    
    # Handle different date formats
    if hasattr(date, 'strftime'):
        date_str = date.strftime('%Y-%m')
    else:
        date_str = str(date)[:7]  # Fallback to string slicing
    

# Calculate reversal strategy returns
df_rev['reversal_return'] = df_rev['hedged_return'] * df_rev['reversal_weight']
rev_returns = df_rev.groupby('date')['reversal_return'].sum()

display(df_rev[['date', 'country', 'cum_return_5y', 'rank', 'reversal_weight']])

Unnamed: 0,date,country,cum_return_5y,rank,reversal_weight
59,2007-03-01,AUSTRALIA,0.542413,6.0,-0.333333
60,2007-04-01,AUSTRALIA,0.610443,6.0,-0.333333
61,2007-05-01,AUSTRALIA,0.541457,6.0,-0.333333
62,2007-06-01,AUSTRALIA,0.614339,6.0,-0.333333
63,2007-07-01,AUSTRALIA,0.661300,4.0,0.000000
...,...,...,...,...,...
1906,2024-08-01,USA,0.542028,5.0,-0.166667
1907,2024-09-01,USA,0.445770,5.0,-0.166667
1908,2024-10-01,USA,0.377511,5.0,-0.166667
1909,2024-11-01,USA,0.453446,6.0,-0.333333


## 5 B

In [36]:
# REV Strategy Performance Analysis

#---------------------------------------------
# Calculate Long and Short Leg Returns Separately for REV Strategy
#---------------------------------------------

# Separate long and short positions
df_rev_long = df_rev[df_rev['reversal_weight'] > 0].copy()
df_rev_short = df_rev[df_rev['reversal_weight'] < 0].copy()

# Calculate long leg returns (sum of all long position returns)
long_leg_returns_rev = df_rev_long.groupby('date')['reversal_return'].sum()     #(of the 3 Countries)

# Calculate short leg returns (sum of all short position returns) 
short_leg_returns_rev = df_rev_short.groupby('date')['reversal_return'].sum()   #(of the 3 Countries)

# Total reversal strategy returns
total_rev_returns = rev_returns  # We already computed it before but we rename it for clarity purposes

# Ensure all series have the same dates
common_dates_rev = long_leg_returns_rev.index.intersection(short_leg_returns_rev.index)
long_leg_returns_rev = long_leg_returns_rev.reindex(common_dates_rev)
short_leg_returns_rev = short_leg_returns_rev.reindex(common_dates_rev)
total_rev_returns = total_rev_returns.reindex(common_dates_rev)

#---------------------------------------------
# Calculate Performance Statistics for REV Strategy
#---------------------------------------------

# Function to calculate all statistics at once (same as used for MOM)
def calculate_stats(returns_series, name):
    mean_monthly = returns_series.mean()
    std_monthly = returns_series.std()
    
    mean_annual = mean_monthly * 12
    std_annual = std_monthly * np.sqrt(12)
    sharpe_ratio = mean_annual / std_annual if std_annual != 0 else 0
    
    # T-statistic for testing if mean is significantly different from zero
    n_obs = len(returns_series.dropna())
    t_stat = mean_monthly / (std_monthly / np.sqrt(n_obs)) if std_monthly != 0 else 0
    p_value = 2 * (1 - stats.t.cdf(abs(t_stat), n_obs - 1))  # Two-tailed test
    
    return {
        'Strategy': name,
        'Mean (Annualized)': mean_annual,
        'Std Dev (Annualized)': std_annual,
        'Sharpe Ratio': sharpe_ratio,
        'T-Statistic': t_stat,
        'P-Value': p_value,
        'Significant at 5%': 'Yes' if p_value < 0.05 else 'No'
    }

# Calculate statistics for each component of REV strategy
long_stats_rev = calculate_stats(long_leg_returns_rev, 'Long Leg')
short_stats_rev = calculate_stats(short_leg_returns_rev, 'Short Leg') 
total_stats_rev = calculate_stats(total_rev_returns, 'Total REV Strategy')

reversal_summary = pd.DataFrame([long_stats_rev, short_stats_rev, total_stats_rev])

print("REV STRATEGY PERFORMANCE SUMMARY:")
display(reversal_summary)

REV STRATEGY PERFORMANCE SUMMARY:


Unnamed: 0,Strategy,Mean (Annualized),Std Dev (Annualized),Sharpe Ratio,T-Statistic,P-Value,Significant at 5%
0,Long Leg,0.065411,0.136928,0.477703,2.017317,0.044917,Yes
1,Short Leg,-0.070804,0.145209,-0.487601,-2.059115,0.0407,Yes
2,Total REV Strategy,-0.005394,0.072952,-0.073933,-0.312214,0.755183,No


## 5 C

In [38]:

## Regression Analysis: REV vs DIV

# Get DIV strategy returns (currency hedged risk parity from Ex 3)
div_returns = results_rph  

# Ensure both series have the same dates
common_dates_rev_div = total_rev_returns.index.intersection(div_returns.index)
rev_returns = total_rev_returns.reindex(common_dates_rev_div)
div_returns = div_returns.reindex(common_dates_rev_div)

# Remove any remaining NaN values (In case of any problems in date intersection)
valid_mask_rev = ~(rev_returns.isna() | div_returns.isna())
rev_returns = rev_returns[valid_mask_rev]
div_returns = div_returns[valid_mask_rev]

# Run the regression: REV = alpha + beta * DIV + epsilon
# Force conversion to native NumPy float arrays since problem with Float64 instead of float64
X_rev = sm.add_constant(div_returns.astype(float).values)
y_rev = rev_returns.astype(float).values
model_rev = sm.OLS(y_rev, X_rev).fit()

# Calculate correlation
correlation_rev = np.corrcoef(rev_returns, div_returns)[0, 1]

# Create results dataframe
results_REV_DIV_df = pd.DataFrame({
    'Coefficient': ['Alpha', 'Beta', 'R²', 'Correlation'],
    'Value': [
        model_rev.params[0],  # const (alpha)
        model_rev.params[1],  # beta
        model_rev.rsquared,
        correlation_rev
    ],
    'Statistical_Significance': [
        'Significant' if model_rev.pvalues[0] < 0.05 else 'Not Significant',
        'Significant' if model_rev.pvalues[1] < 0.05 else 'Not Significant',
        'N/A',
        'N/A'
    ],
    'P_Value': [
        model_rev.pvalues[0],
        model_rev.pvalues[1],
        np.nan,
        np.nan
    ]
})

display(results_REV_DIV_df)


Unnamed: 0,Coefficient,Value,Statistical_Significance,P_Value
0,Alpha,-1.3e-05,Not Significant,0.992659
1,Beta,-0.077066,Significant,0.03824
2,R²,0.020099,,
3,Correlation,-0.141772,,
