In [17]:
import pandas as pd
import numpy as np

File filled manually based on MSCI factsheets and other research documents, most notably from:

https://www.researchgate.net/figure/Composition-of-the-MSCI-All-Country-World-Index-1987-2012-7_tbl1_325426617

In [11]:
country_weights = pd.read_csv('country_weights.csv', index_col=0, parse_dates=True)
df_weights = country_weights.T
df_weights.index = pd.DatetimeIndex(df_weights.index)
df_weights

  country_weights = pd.read_csv('country_weights.csv', index_col=0, parse_dates=True)


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,Peru,Hungary,Czechia,Egypt,Colombia,Pakistan,Jordan,Sri Lanka,Venezuela,Marocco
1998-01-01,46.56,11.26,10.4,2.29,4.1,3.4,4.2,1.22,1.5,1.2,...,0.08,0.08,0.06,0.0,0.06,0.05,0.01,0.01,0.1,0.0
1999-01-01,,,,,,,,,,,...,,,,0.0,,0.04,0.01,0.01,0.08,0.0
2000-01-01,,,,,,,,,,,...,,,,0.0,,0.03,0.01,0.01,0.06,0.0
2001-01-01,,,,,,,,,,,...,,,,0.01,,0.03,0.01,0.0,0.04,0.01
2002-01-01,,,,,,,,,,,...,,,,0.01,,0.02,0.01,0.0,0.02,0.01
2003-01-01,53.99,8.44,11.5,2.2,3.1,3.1,2.6,1.88,1.6,1.2,...,0.02,0.05,0.02,0.01,0.0,0.01,0.01,0.0,0.01,0.01
2004-01-01,,,,,,,,,,,...,,,,0.02,,0.01,0.01,0.0,0.01,0.01
2005-01-01,,,,,,,,,,,...,,,,0.03,,0.01,0.01,0.0,0.01,0.01
2006-01-01,,,,,,,,,,,...,,,,0.05,,0.02,0.01,0.0,0.0,0.02
2007-01-01,,,,,,,,,,,...,,,,0.07,,0.02,0.01,0.0,0.0,0.02


Skips some countries that I filled manually to keep years when they were included and excluded from indexes

In [13]:
countries_to_fill = df_weights.columns[df_weights.isna().sum(axis=0) > 0]
countries_to_fill

Index(['United States', 'Japan', 'United Kingdom', 'Canada', 'France',
       'Switzerland', 'Germany', 'Australia', 'Netherlands', 'Sweden', 'Spain',
       'Hong Kong', 'Italy', 'Singapore', 'Denmark', 'Finland', 'Belgium',
       'Norway', 'Israel', 'Ireland', 'New Zealand', 'Austria', 'Portugal',
       'China', 'Taiwan', 'India', 'Korea', 'Brazil', 'Russia', 'South Africa',
       'Mexico', 'Malaysia', 'Thailand', 'Indonesia', 'Turkiye', 'Chile',
       'Poland', 'Greece', 'Philippines', 'Peru', 'Hungary', 'Czechia',
       'Colombia'],
      dtype='object')

Given country weights in some years fill the rest with progressive values

In [36]:
def fill_with_linspace_values(start_year, end_year):
    for coutry in countries_to_fill:
        start_value = df_weights.loc[start_year, coutry]
        end_value = df_weights.loc[end_year, coutry]    
        period_to_cover = int(end_year) - int(start_year) + 1
        
        df_weights.loc[start_year:end_year, coutry] = np.linspace(
            start_value, end_value, period_to_cover).reshape(period_to_cover)

In [42]:
fill_with_linspace_values('2021','2023')
df_weights

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,Peru,Hungary,Czechia,Egypt,Colombia,Pakistan,Jordan,Sri Lanka,Venezuela,Marocco
1998-01-01,46.56,11.26,10.4,2.29,4.1,3.4,4.2,1.22,1.5,1.2,...,0.08,0.08,0.06,0.0,0.06,0.05,0.01,0.01,0.1,0.0
1999-01-01,48.046,10.696,10.62,2.272,3.9,3.34,3.88,1.352,1.52,1.2,...,0.068,0.074,0.052,0.0,0.048,0.04,0.01,0.01,0.08,0.0
2000-01-01,49.532,10.132,10.84,2.254,3.7,3.28,3.56,1.484,1.54,1.2,...,0.056,0.068,0.044,0.0,0.036,0.03,0.01,0.01,0.06,0.0
2001-01-01,51.018,9.568,11.06,2.236,3.5,3.22,3.24,1.616,1.56,1.2,...,0.044,0.062,0.036,0.01,0.024,0.03,0.01,0.0,0.04,0.01
2002-01-01,52.504,9.004,11.28,2.218,3.3,3.16,2.92,1.748,1.58,1.2,...,0.032,0.056,0.028,0.01,0.012,0.02,0.01,0.0,0.02,0.01
2003-01-01,53.99,8.44,11.5,2.2,3.1,3.1,2.6,1.88,1.6,1.2,...,0.02,0.05,0.02,0.01,0.0,0.01,0.01,0.0,0.01,0.01
2004-01-01,51.552,8.472,11.12,2.494,3.32,3.3,2.82,2.062,1.54,1.22,...,0.03,0.058,0.034,0.02,0.006,0.01,0.01,0.0,0.01,0.01
2005-01-01,49.114,8.504,10.74,2.788,3.54,3.5,3.04,2.244,1.48,1.24,...,0.04,0.066,0.048,0.03,0.012,0.01,0.01,0.0,0.01,0.01
2006-01-01,46.676,8.536,10.36,3.082,3.76,3.7,3.26,2.426,1.42,1.26,...,0.05,0.074,0.062,0.05,0.018,0.02,0.01,0.0,0.0,0.02
2007-01-01,44.238,8.568,9.98,3.376,3.98,3.9,3.48,2.608,1.36,1.28,...,0.06,0.082,0.076,0.07,0.024,0.02,0.01,0.0,0.0,0.02


In [78]:
# Manual fixes
df_weights.loc['2022', 'Russia'] = 0
df_weights.loc['2003', 'Colombia'] = 0.01

Normalize to match 100

In [72]:
df_weights.sum(axis=1)

1998-01-01    99.63
1999-01-01    99.63
2000-01-01    99.64
2001-01-01    99.73
2002-01-01    99.78
2003-01-01    99.87
2004-01-01    99.81
2005-01-01    99.76
2006-01-01    99.74
2007-01-01    99.70
2008-01-01    99.67
2009-01-01    99.61
2010-01-01    99.59
2011-01-01    99.59
2012-01-01    99.61
2013-01-01    99.61
2014-01-01    99.70
2015-01-01    99.68
2016-01-01    99.63
2017-01-01    99.60
2018-01-01    99.57
2019-01-01    99.87
2020-01-01    99.91
2021-01-01    99.94
2022-01-01    99.72
2023-01-01    99.87
2024-01-01    99.99
dtype: float64

In [73]:
df_weights_norm = df_weights.multiply((100/df_weights.sum(axis=1)), axis=0)

In [74]:
df_weights_norm = df_weights_norm.round(2)

In [75]:
df_weights_norm.sum(axis=1)

1998-01-01     99.96
1999-01-01     99.96
2000-01-01     99.97
2001-01-01     99.97
2002-01-01     99.97
2003-01-01     99.96
2004-01-01     99.98
2005-01-01     99.98
2006-01-01     99.96
2007-01-01     99.95
2008-01-01     99.95
2009-01-01     99.98
2010-01-01     99.98
2011-01-01     99.97
2012-01-01     99.95
2013-01-01     99.96
2014-01-01     99.96
2015-01-01     99.95
2016-01-01     99.95
2017-01-01     99.94
2018-01-01     99.96
2019-01-01     99.96
2020-01-01     99.97
2021-01-01     99.98
2022-01-01     99.99
2023-01-01     99.97
2024-01-01    100.00
dtype: float64

In [80]:
df_weights_norm = df_weights_norm['1999':]
df_weights_norm

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,Peru,Hungary,Czechia,Egypt,Colombia,Pakistan,Jordan,Sri Lanka,Venezuela,Marocco
1999-01-01,48.23,10.74,10.66,2.28,3.91,3.35,3.89,1.36,1.53,1.2,...,0.07,0.07,0.05,0.0,0.05,0.04,0.01,0.01,0.08,0.0
2000-01-01,49.71,10.17,10.88,2.26,3.71,3.29,3.57,1.49,1.55,1.2,...,0.06,0.07,0.04,0.0,0.04,0.03,0.01,0.01,0.06,0.0
2001-01-01,51.16,9.6,11.09,2.25,3.51,3.23,3.25,1.62,1.56,1.2,...,0.04,0.06,0.04,0.01,0.02,0.03,0.01,0.0,0.04,0.01
2002-01-01,52.62,9.02,11.3,2.22,3.31,3.17,2.93,1.75,1.58,1.2,...,0.03,0.06,0.03,0.01,0.01,0.02,0.01,0.0,0.02,0.01
2003-01-01,54.06,8.45,11.51,2.2,3.1,3.1,2.6,1.88,1.6,1.2,...,0.02,0.05,0.02,0.01,0.01,0.01,0.01,0.0,0.01,0.01
2004-01-01,51.65,8.49,11.14,2.49,3.33,3.31,2.83,2.06,1.54,1.22,...,0.03,0.06,0.03,0.02,0.01,0.01,0.01,0.0,0.01,0.01
2005-01-01,49.23,8.52,10.77,2.8,3.55,3.51,3.05,2.25,1.48,1.24,...,0.04,0.07,0.05,0.03,0.01,0.01,0.01,0.0,0.01,0.01
2006-01-01,46.8,8.56,10.39,3.09,3.77,3.71,3.27,2.44,1.42,1.26,...,0.05,0.07,0.06,0.05,0.02,0.02,0.01,0.0,0.0,0.02
2007-01-01,44.37,8.6,10.01,3.39,3.99,3.91,3.49,2.62,1.36,1.28,...,0.06,0.08,0.08,0.07,0.02,0.02,0.01,0.0,0.0,0.02
2008-01-01,41.94,8.63,9.63,3.68,4.21,4.11,3.71,2.8,1.3,1.3,...,0.07,0.09,0.09,0.09,0.03,0.02,0.01,0.0,0.0,0.03


Save to CSV

In [81]:
df_weights_norm.to_csv('acwi_coutry_weights.csv')