In [1]:
# Standard Library Imports (if any)

# Third-party Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from IPython.display import display, HTML

# Local Module Imports
from utils import *
from data import *


In [2]:
data = DataManager(
    raw_dir = "../data/raw",
    save_dir = "../data/clean"
)
data.clean_and_save_data()

# Access datasets
cpis = data.get_dataset("cpis")
ds = data.get_dataset("ds")
fed = data.get_dataset("fed")
wb = data.get_dataset("wb")
wfe = data.get_dataset("wfe")

In [3]:
offshore = [CPIS.COUNTRY_TO_CODE[country] for country in CPIS.OFFSHORE_CENTERS]
sample = DS.CODES.copy()
offshore_weights = cpis.calculate_offshore_weights(DS.CODES.copy(), offshore)
offshore_investments = cpis.calculate_offshore_investments(DS.CODES.copy(), offshore)
offshore_distribution = cpis.distribute_offshore_holdings(offshore_investments, offshore_weights)  
domestic_investments = cpis.calculate_domestic_investments(DS.CODES.copy(), wb)
cpis_redis = (cpis + offshore_distribution + domestic_investments).get_data(issuers=sample, holders=sample)
cpis_redis_tot_per_country = cpis_redis.groupby("Country Name").sum()
cpis_redis_weight = cpis_redis / cpis_redis_tot_per_country

In [4]:
first_2001_column = ds.columns[ds.columns.year == 2001].min()
index_of_first_2001 = ds.columns.get_loc(first_2001_column)
ds.loc["ID"].iloc[index_of_first_2001:index_of_first_2001+49] = list(DS.ID_MODIFICATIONS.values())

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  ds.loc["ID"].iloc[index_of_first_2001:index_of_first_2001+49] = list(DS.ID_MODIFICATIONS.values())


In [5]:
ds_2004 = ds.loc[:, (ds.columns >= pd.Timestamp("2003-12-01")) & (ds.columns <= pd.Timestamp("2005-01-01"))]
ds_2004 = ds_2004.rename(columns=DS.COUNTRY_TO_CODE, index=DS.COUNTRY_TO_CODE)
cov_2004 = ds_2004.T.cov()
cov_2004
weights = cpis_redis_weight.get_data(holders=sample, periods=2004)
weights_np = weights.unstack().to_numpy()
weights_np

array([[8.21572469e-01, 1.09059796e-04, 6.47926194e-04, ...,
        9.17556720e-05, 1.43370323e-01, 2.96975230e-06],
       [3.05990248e-04, 5.44498392e-01, 8.89396862e-03, ...,
        1.10317942e-03, 1.35698471e-01, 3.19764341e-05],
       [1.68515306e-04, 1.74188451e-03, 5.64759901e-01, ...,
        9.89835716e-04, 9.82246941e-02, 4.33494114e-05],
       ...,
       [4.15008821e-07, 1.06319765e-06, 6.11353123e-06, ...,
        9.98751557e-01, 2.77261027e-04, 2.85448396e-08],
       [1.34777158e-04, 5.60740787e-04, 1.11127505e-03, ...,
        3.37834258e-04, 8.95208927e-01, 6.24689014e-05],
       [6.06396144e-06, 0.00000000e+00, 5.40244476e-06, ...,
        0.00000000e+00, 7.05190390e-03, 9.92736682e-01]])

In [6]:
ds_2004_np = ds_2004.pct_change(axis=1).iloc[:, 1:].to_numpy()
cov_2004 = np.cov(ds_2004_np)
cov_2004[0,:] @ weights_np[0,:]

np.float64(0.0059856257422611)

In [7]:
np.diagonal(np.dot(weights_np,cov_2004.T))

array([0.00598563, 0.00122867, 0.00119302, 0.00427647, 0.00134072,
       0.00192014, 0.0062921 , 0.00395225, 0.00192548, 0.00586378,
       0.0009214 , 0.00130442, 0.00260431, 0.00201679, 0.00172822,
       0.01126052, 0.00238204, 0.00117792, 0.00312581, 0.00450447,
       0.00153166, 0.00163051, 0.00089116, 0.00192842, 0.00175879,
       0.00188552, 0.00118907, 0.00051135, 0.00466712, 0.00139272,
       0.00146916, 0.00082664, 0.0028236 , 0.00987267, 0.0005076 ,
       0.00680031])

In [8]:
var_cov_2004 = pd.DataFrame({"var":ds_2004_np.var(axis=1), "cov":np.diagonal(np.dot(weights_np,cov_2004.T))}, index=ds_2004.index)
var_cov_2004

Unnamed: 0,var,cov
AR,0.006566,0.005986
AT,0.001338,0.001229
BE,0.00128,0.001193
BR,0.003941,0.004276
CA,0.001443,0.001341
CL,0.001912,0.00192
CO,0.005889,0.006292
CZ,0.003982,0.003952
DK,0.002144,0.001925
FI,0.00669,0.005864
