In [2]:
# Standard Library Imports (if any)

# Third-party Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from IPython.display import display, HTML

# Local Module Imports
from utils import *
from data import *
from data.processors import DataFrameCPIS


In [3]:
data = DataManager(
    raw_dir = "../data/raw",
    save_dir = "../data/clean"
)
# data.clean_and_save_data()

# Access datasets
cpis = data.get_dataset("cpis")
ds = data.get_dataset("ds")
fed = data.get_dataset("fed")
wb = data.get_dataset("wb")
wfe = data.get_dataset("wfe")

In [4]:
offshore = [CPIS.COUNTRY_TO_CODE[country] for country in CPIS.OFFSHORE_CENTERS]
sample = DS.CODES.copy()
offshore_weights = cpis.calculate_offshore_weights(DS.CODES.copy(), offshore)
offshore_investments = cpis.calculate_offshore_investments(DS.CODES.copy(), offshore)
offshore_distribution = cpis.distribute_offshore_holdings(offshore_investments, offshore_weights)  
domestic_investments = cpis.calculate_domestic_investments(DS.CODES.copy(), wb)
cpis_redis = (cpis + offshore_distribution + domestic_investments).get_data(issuers=sample, holders=sample)
cpis_redis_tot_per_country = cpis_redis.groupby("Country Name").sum()
cpis_redis_weight = cpis_redis / cpis_redis_tot_per_country
first_2001_column = ds.columns[ds.columns.year == 2001].min()
index_of_first_2001 = ds.columns.get_loc(first_2001_column)
ds.loc["ID"].iloc[index_of_first_2001:index_of_first_2001+49] = list(DS.ID_MODIFICATIONS.values())
ds_2004 = ds.loc[:, (ds.columns >= pd.Timestamp("2003-12-01")) & (ds.columns <= pd.Timestamp("2005-01-01"))]
ds_2004 = ds_2004.rename(columns=DS.COUNTRY_TO_CODE, index=DS.COUNTRY_TO_CODE)
cov_2004 = ds_2004.T.cov()
cov_2004
weights = cpis_redis_weight.get_data(holders=sample, periods=2004)
weights_np = weights.unstack().to_numpy()
weights_np
ds_2004_np = ds_2004.pct_change(axis=1).iloc[:, 1:].to_numpy()
cov_2004 = np.cov(ds_2004_np)
cov_2004[0,:] @ weights_np[0,:]
np.diagonal(np.dot(weights_np,cov_2004.T))
var_cov_2004 = pd.DataFrame({"var":ds_2004_np.var(axis=1), "cov":np.diagonal(np.dot(weights_np,cov_2004.T))}, index=ds_2004.index)
var_cov_2004

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  ds.loc["ID"].iloc[index_of_first_2001:index_of_first_2001+49] = list(DS.ID_MODIFICATIONS.values())


Unnamed: 0,var,cov
AR,0.006566,0.005986
AT,0.001338,0.001229
BE,0.00128,0.001193
BR,0.003941,0.004276
CA,0.001443,0.001341
CL,0.001912,0.00192
CO,0.005889,0.006292
CZ,0.003982,0.003952
DK,0.002144,0.001925
FI,0.00669,0.005864


In [5]:
cpis = DataFrameCPIS(cpis)
S = cpis.calculate_cov_index_portfolio(wb, fed, ds, (2004, 2004), False)
W = cpis.calculate_weight_matrix(wb, sample_period=(2004, 2004))
K = cpis.calculate_cov_matrix_returns(fed, ds, (2004, 2004), False)
R = cpis.calculate_excess_returns_matrix(fed, ds, (2004, 2004), False)
S

  returns = ds.pct_change(axis=1, fill_method="ffill")
  returns = ds.pct_change(axis=1, fill_method="ffill")
  returns = ds.pct_change(axis=1, fill_method="ffill")


array([0.00596221, 0.0012134 , 0.0011767 , 0.00424636, 0.00132807,
       0.00190209, 0.00627665, 0.00392763, 0.0019139 , 0.005845  ,
       0.00090699, 0.00128684, 0.00257882, 0.00200536, 0.00171179,
       0.01123421, 0.00236704, 0.00115902, 0.00312089, 0.00448588,
       0.00152762, 0.00161847, 0.00087711, 0.00191538, 0.0017555 ,
       0.00186235, 0.00117803, 0.00050466, 0.00464363, 0.00137279,
       0.00145481, 0.00081364, 0.00280099, 0.00984111, 0.00049935,
       0.00681234])

In [6]:
var_cov_2004 = pd.DataFrame({"var":R.var(axis=1), "cov":np.diagonal(np.dot(weights_np,cov_2004.T))}, index=R.index)
var_cov_2004

Unnamed: 0,var,cov
AR,0.007138,0.005986
AT,0.001444,0.001229
BE,0.001379,0.001193
BR,0.004269,0.004276
CA,0.001561,0.001341
CL,0.002067,0.00192
CO,0.006409,0.006292
CZ,0.004318,0.003952
DK,0.002328,0.001925
FI,0.007278,0.005864


In [7]:
ds = data.get_dataset("ds")
fed = data.get_dataset("fed")

start, end = (2004, 2004)
ds = ds.loc[:, (ds.columns >= pd.Timestamp(f"{start-1}-12-01")) & (ds.columns <= pd.Timestamp(f"{end+1}-01-01"))]

# Align the risk-free rate data with the returns data
fed = fed.reindex(columns=ds.columns)
fed = annual_to_monthly_return(fed)

# Calculate returns and risk-free rate
returns = ds.pct_change(axis=1, fill_method="ffill")
risk_free_rate = fed.loc["FEDFUNDS"]

# Simple excess returns
excess_returns = returns.subtract(risk_free_rate, axis=1)
mean_excess = excess_returns.mean(axis=1)
var_excess = excess_returns.var(axis=1)

# Log excess returns
log_returns = np.log(1 + returns)
log_risk_free = np.log(1 + risk_free_rate)
log_excess_returns = log_returns.subtract(log_risk_free, axis=1)
mean_log_excess = log_excess_returns.mean(axis=1)
var_log_excess = log_excess_returns.var(axis=1)

var_excess

  returns = ds.pct_change(axis=1, fill_method="ffill")


AR    0.007138
AT    0.001444
BE    0.001379
BR    0.004269
CA    0.001561
CL    0.002067
CO    0.006409
CZ    0.004318
DK    0.002328
FI    0.007278
FR    0.000927
DE    0.001465
GR    0.002651
HK    0.002225
HU    0.001754
ID    0.011239
IL    0.002445
IT    0.001338
JP    0.003409
KR    0.004578
MY    0.001532
MX    0.001640
NL    0.001119
NO    0.002627
PH    0.001768
PL    0.001870
PT    0.001255
SG    0.000555
ZA    0.004763
ES    0.001417
SE    0.001865
CH    0.000868
TH    0.002815
TR    0.009853
US    0.000489
VE    0.006864
dtype: float64