# Preparing Key Macroeconomic Indicators

Preparing six key economic indicators. Includes examining NAs, combining data sources, filling missing values, getting the latest values and calculating composite indicators. Described in chapters 3.2.2 and 3.6.

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

from dotenv import load_dotenv
load_dotenv()

from DataProvider import DataProvider
data_provider = DataProvider()

from SqlAlquemySelectDataHandler import SqlAlquemySelectDataHandler

### GDP Annual Growth Rate

In [46]:
indicator_name = 'GDP Annual Growth Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'QS')
df_investing_indicator

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,,,,,,,,,,,,,,,,6.92,,,,,,,,,,,
1999-02-01,,,,,,,,,,,,,,,,6.92,,,,,,,,,,,
1999-03-01,,,,,,,,,,,,,,,,6.92,,,,,,,,,,,
1999-04-01,,,,,,,,,,,,,,,,4.30,,,,,,,,,,,
1999-05-01,,,,,,,,,,,,,,,,4.30,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,,-2.9,0.3,0.47,0.6,0.3,-0.4,2.1,-0.8,-1.4,4.1,1.8,0.1,1.1,-0.3,-1.20,,,4.9,2.32,,1.4,2.0,5.1,-0.7,3.3,3.3
2023-09-01,,-2.9,0.3,0.47,0.6,0.3,-0.4,2.1,-0.8,-1.4,4.1,1.8,0.1,1.1,-0.3,-1.20,,,4.9,2.32,,1.4,2.0,5.1,-0.7,3.3,3.3
2023-10-01,,-0.4,-0.2,,0.7,,-0.2,,-0.5,0.0,4.3,2.0,0.5,2.2,3.1,-0.80,,,5.2,5.12,,2.2,,,,2.5,3.0
2023-11-01,,-0.4,-0.2,,0.7,,-0.2,,-0.5,0.0,4.3,2.0,0.5,2.2,3.1,-0.80,,,5.2,5.12,,2.2,,,,2.5,3.0


In [47]:
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'QS')
df_oecd_indicator

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,5.93,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-02-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,5.93,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-03-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,5.93,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-04-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,,4.30,0.64,,3.62,4.39,2.96,-0.03,7.9,,7.53,12.16,-0.44,,1.64,2.30,
1999-05-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,,4.30,0.64,,3.62,4.39,2.96,-0.03,7.9,,7.53,12.16,-0.44,,1.64,2.30,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,2.93,1.63,0.27,0.47,0.63,0.43,-0.28,2.05,-0.56,-1.33,,1.91,0.06,,-0.32,-1.03,1.38,-1.49,4.9,,7.02,1.25,2.47,,-0.48,2.96,
2023-09-01,2.93,1.63,0.27,0.47,0.63,0.43,-0.28,2.05,-0.56,-1.33,,1.91,0.06,,-0.32,-1.03,1.38,-1.49,4.9,,7.02,1.25,2.47,,-0.48,2.96,
2023-10-01,3.11,,,0.98,0.66,,-0.16,,,0.02,,2.04,0.50,,,,1.56,,5.2,,,2.20,,,,2.37,
2023-11-01,3.11,,,0.98,0.66,,-0.16,,,0.02,,2.04,0.50,,,,1.56,,5.2,,,2.20,,,,2.37,


In [48]:
df_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,6.92,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-02-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,6.92,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-03-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,,4.24,0.86,,2.31,6.92,1.90,0.87,8.9,,6.20,6.58,0.67,,0.98,2.29,
1999-04-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,,4.30,0.64,,3.62,4.30,2.96,-0.03,7.9,,7.53,12.16,-0.44,,1.64,2.30,
1999-05-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,,4.30,0.64,,3.62,4.30,2.96,-0.03,7.9,,7.53,12.16,-0.44,,1.64,2.30,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,2.93,-2.90,0.30,0.47,0.60,0.30,-0.40,2.10,-0.80,-1.40,4.1,1.80,0.10,1.1,-0.30,-1.20,1.38,-1.49,4.9,2.32,7.02,1.40,2.00,5.1,-0.70,3.30,3.3
2023-09-01,2.93,-2.90,0.30,0.47,0.60,0.30,-0.40,2.10,-0.80,-1.40,4.1,1.80,0.10,1.1,-0.30,-1.20,1.38,-1.49,4.9,2.32,7.02,1.40,2.00,5.1,-0.70,3.30,3.3
2023-10-01,3.11,-0.40,-0.20,0.98,0.70,,-0.20,,-0.50,0.00,4.3,2.00,0.50,2.2,3.10,-0.80,1.56,,5.2,5.12,,2.20,,,,2.50,3.0
2023-11-01,3.11,-0.40,-0.20,0.98,0.70,,-0.20,,-0.50,0.00,4.3,2.00,0.50,2.2,3.10,-0.80,1.56,,5.2,5.12,,2.20,,,,2.50,3.0


In [49]:
df_missing_data = pd.DataFrame(index=data_provider.selected_countries)
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data[f'{indicator_name} (Combined)'] = df_combined.isna().sum()
df_missing_data

Unnamed: 0,GDP Annual Growth Rate (Investing),GDP Annual Growth Rate (OECD),GDP Annual Growth Rate (Combined)
United States,300,0,0
Japan,174,3,0
United Kingdom,156,3,0
Canada,225,0,0
France,225,0,0
Switzerland,171,3,3
Germany,156,0,0
Australia,171,3,3
Netherlands,141,3,0
Sweden,177,0,0


### GDP Growth Rate

In [50]:
indicator_name = 'GDP Growth Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'QS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'QS')
df_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-02-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-03-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-04-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
1999-05-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-09-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-10-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,
2023-11-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,


In [51]:
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data[f'{indicator_name} (Combined)'] = df_combined.isna().sum()
df_missing_data.iloc[:, -3:]

Unnamed: 0,GDP Growth Rate (Investing),GDP Growth Rate (OECD),GDP Growth Rate (Combined)
United States,105,0,0
Japan,105,3,0
United Kingdom,105,3,0
Canada,174,0,0
France,105,0,0
Switzerland,108,3,3
Germany,105,0,0
Australia,108,3,3
Netherlands,183,3,0
Sweden,138,0,0


### Unemployment Rate

In [52]:
indicator_name = 'Unemployment Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'MS')
df_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,4.4,4.5,6.2,7.9,12.0,3.1,10.7,7.1,4.6,7.3,6.4,14.80,11.1,,6.1,10.6,8.9,2.7,,2.92,,7.6,,14.2,,2.6,
1999-02-01,4.4,4.6,6.2,7.9,12.0,3.0,10.7,7.1,4.5,7.0,6.4,14.30,11.1,,5.9,10.5,9.0,2.7,,2.88,,7.4,,14.6,,3.1,
1999-03-01,4.2,4.7,6.2,7.9,11.8,3.0,10.6,7.0,4.4,7.0,6.3,13.90,11.1,,5.9,10.5,9.0,2.9,,2.84,,7.2,,14.1,,2.7,
1999-04-01,4.3,4.7,6.2,8.2,11.7,2.9,10.6,7.0,4.4,6.9,6.2,13.60,11.1,,5.9,10.5,9.0,2.9,,2.97,,7.0,,13.4,,2.8,
1999-05-01,4.2,4.7,6.1,7.9,11.5,2.8,10.5,7.0,4.3,6.7,6.2,13.50,11.1,,5.8,10.4,9.0,2.9,,2.98,,6.8,,12.7,,2.6,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,3.8,2.7,4.2,5.5,7.4,2.1,5.7,3.7,3.6,7.7,2.8,11.90,7.3,,2.5,7.2,5.3,3.6,5.2,3.42,,2.4,7.9,3.0,,2.7,3.4
2023-09-01,3.9,2.6,4.2,5.5,7.3,2.1,5.7,3.6,3.7,7.7,2.8,11.90,7.4,,2.5,7.3,5.4,3.5,5.0,3.48,,2.6,7.8,3.0,,2.7,3.4
2023-10-01,3.7,2.5,4.2,5.7,7.5,2.1,5.8,3.7,3.6,7.4,2.9,11.76,7.8,2.0,2.5,7.5,5.6,3.6,5.0,3.41,,2.5,7.7,2.9,32.1,2.6,3.4
2023-11-01,3.7,2.5,3.7,5.8,7.3,2.1,5.9,3.9,3.5,7.1,2.9,11.80,7.6,,2.3,7.6,5.6,3.7,5.0,3.37,,2.8,7.6,2.9,,2.8,3.4


In [53]:
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data[f'{indicator_name} (Combined)'] = df_combined.isna().sum()
df_missing_data.iloc[:, -3:]

Unnamed: 0,Unemployment Rate (Investing),Unemployment Rate (OECD),Unemployment Rate (Combined)
United States,0,0,0
Japan,5,0,0
United Kingdom,2,2,0
Canada,0,0,0
France,245,0,0
Switzerland,0,300,0
Germany,9,0,0
Australia,0,0,0
Netherlands,50,0,0
Sweden,26,0,0


### Inflation Rate

In [54]:
indicator_name = 'Inflation Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'MS')
df_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,1.6,0.2,1.6,0.7,0.24,0.1,0.2,,2.2,-0.3,-1.1,1.5,1.4,-0.92,1.7,0.50,0.95,2.3,-1.2,0.40,9.38,1.5,1.65,96.9,8.84,19.02,5.2
1999-02-01,1.7,-0.1,1.4,0.7,0.18,0.3,0.1,1.19,2.1,-0.1,-1.7,1.8,1.3,-0.73,2.0,0.76,0.98,2.1,-1.3,2.09,8.64,0.2,2.24,103.2,8.58,18.54,3.8
1999-03-01,1.6,-0.4,1.7,1.0,0.35,0.5,0.2,,2.2,0.1,-2.6,2.2,1.3,-0.52,2.3,0.79,1.21,2.3,-1.8,-0.47,8.95,0.5,3.02,107.5,7.83,18.26,3.0
1999-04-01,1.7,-0.1,1.5,1.6,0.42,0.6,0.6,,2.1,0.1,-3.8,2.4,1.4,-0.32,2.3,1.06,1.18,2.5,-2.2,-0.10,8.36,0.4,3.35,112.9,7.57,18.23,2.9
1999-05-01,2.3,-0.4,1.3,1.5,0.41,0.6,0.5,1.04,2.3,0.1,-4.0,2.2,1.4,0.15,2.0,1.37,0.83,2.5,-2.2,0.49,7.71,0.8,3.14,116.6,7.08,18.01,2.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,3.7,3.2,6.7,4.0,4.80,1.6,6.1,5.37,3.0,7.5,1.8,2.6,5.4,4.00,2.4,5.60,4.09,4.8,0.1,2.52,6.83,3.4,4.61,5.2,4.80,4.64,2.0
2023-09-01,3.2,3.0,6.7,3.8,4.90,1.7,4.5,,0.2,6.5,2.0,3.5,5.3,4.10,0.9,5.50,0.36,3.3,0.0,2.93,5.02,3.7,5.19,6.0,5.40,4.45,1.9
2023-10-01,3.1,3.3,4.6,3.1,4.00,1.7,3.8,4.10,-0.4,6.5,2.7,3.5,1.7,4.70,0.1,4.90,0.36,4.0,-0.2,3.05,4.87,3.8,4.82,6.7,5.90,4.26,1.8
2023-11-01,3.4,2.8,3.9,3.1,3.50,1.4,3.2,4.05,1.6,5.8,2.6,3.2,0.7,3.60,0.6,3.30,0.76,4.8,-0.5,2.90,5.55,3.3,4.68,7.5,5.50,4.32,1.5


In [55]:
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data[f'{indicator_name} (Combined)'] = df_combined.isna().sum()
df_missing_data.iloc[:, -3:]

Unnamed: 0,Inflation Rate (Investing),Inflation Rate (OECD),Inflation Rate (Combined)
United States,0,0,0
Japan,6,30,0
United Kingdom,1,0,0
Canada,2,0,0
France,230,0,0
Switzerland,0,0,0
Germany,0,0,0
Australia,257,200,157
Netherlands,0,0,0
Sweden,6,0,0


### Inflation Rate MoM

In [56]:
indicator_name = 'Inflation Rate MoM'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'MS')
df_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.2,-0.5,-0.6,0.2,-0.4,0.2,-0.1,,0.06,0.0,,0.4,0.1,,-0.10,-0.21,0.33,0.4,0.2,0.16,-2.10,-0.1,0.70,8.3,0.75,2.53,
1999-02-01,0.0,-0.4,0.2,0.1,0.3,0.3,0.1,,0.50,0.0,,0.1,0.1,,0.77,0.24,0.23,0.2,1.3,-0.03,-1.19,0.4,1.05,4.2,0.00,1.34,
1999-03-01,0.1,0.1,0.5,0.4,0.4,0.1,0.0,,0.87,0.4,,0.4,0.3,,0.48,0.24,0.07,0.7,-0.8,-0.47,-0.24,0.2,1.10,2.8,0.00,0.93,
1999-04-01,0.7,0.5,0.4,0.5,0.3,0.2,0.5,,0.22,0.2,,0.4,0.3,,0.28,0.73,0.41,0.3,-1.0,-0.15,0.24,0.3,0.56,3.0,0.21,0.92,
1999-05-01,0.1,0.0,0.3,0.2,0.0,-0.2,0.0,,0.03,0.2,,0.0,0.1,,0.19,0.25,0.16,-0.2,-1.3,0.18,0.96,-0.2,0.30,2.1,-0.11,0.60,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,0.6,0.2,0.3,0.4,1.0,0.2,0.3,,0.40,0.1,0.0,0.5,0.3,0.9,-0.70,-0.70,0.76,-0.8,0.3,0.18,-0.36,1.0,0.23,0.3,0.30,0.55,0.2
2023-09-01,0.4,0.3,0.5,-0.1,-0.5,-0.1,0.3,,-0.40,0.5,0.4,0.2,0.2,0.5,-0.30,0.70,0.34,-0.1,0.2,0.34,-1.22,0.6,0.26,0.9,0.60,0.44,0.1
2023-10-01,0.0,,0.0,0.1,0.1,0.1,0.0,,0.40,0.2,1.0,0.3,-0.2,0.2,0.30,0.30,0.34,1.0,-0.1,0.33,0.65,0.3,0.24,0.8,0.90,0.38,0.1
2023-11-01,0.1,,-0.2,0.1,-0.2,-0.2,-0.4,,-1.00,0.3,0.0,-0.3,-0.5,-0.2,-0.30,-0.30,0.17,0.5,-0.5,-0.07,0.51,-0.6,0.28,1.1,-0.10,0.64,


In [57]:
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data[f'{indicator_name} (Combined)'] = df_combined.isna().sum()
df_missing_data.iloc[:, -3:]

Unnamed: 0,Inflation Rate MoM (Investing),Inflation Rate MoM (OECD),Inflation Rate MoM (Combined)
United States,1,0,0
Japan,233,30,5
United Kingdom,1,0,0
Canada,4,0,0
France,0,0,0
Switzerland,1,0,0
Germany,1,0,0
Australia,300,300,300
Netherlands,292,0,0
Sweden,0,0,0


### Manufacturing PMI

In [58]:
indicator_name = 'Manufacturing PMI'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_investing_indicator

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,,,,,,46.5,,,,48.0,,,,,,,,,,,,,,,,,
1999-02-01,,,,,,48.0,,,,48.6,,,,,,,,,,,,,,,,,
1999-03-01,,,,,,48.7,,,,51.3,,,,,,,,,,,,,,,,,
1999-04-01,,,,,,51.2,,,,53.1,,,,,,,,,,,,,,,,,
1999-05-01,,,,,,51.8,,,,51.7,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,47.9,49.6,43.0,48.0,46.0,39.9,39.1,49.6,,45.8,49.8,46.5,45.4,,,,,51.4,49.7,44.3,58.6,48.9,50.1,52.7,49.7,51.2,
2023-09-01,49.8,48.5,44.3,47.5,44.2,44.9,39.6,48.7,,43.3,49.6,47.7,46.8,,,,,52.5,50.2,46.4,57.5,49.9,49.0,54.5,45.4,49.8,
2023-10-01,50.0,48.7,44.8,48.6,42.8,40.6,40.8,48.2,,45.7,48.9,45.1,44.9,,,,,47.9,49.5,47.6,55.5,49.8,48.6,53.8,45.4,52.1,
2023-11-01,49.4,48.3,47.2,47.7,42.9,42.1,42.6,47.7,,49.0,50.1,46.3,44.4,,,,,49.9,49.4,48.3,56.0,50.0,49.4,53.8,48.2,52.5,


In [59]:
df_missing_data[f'{indicator_name} (Investing)'] = df_investing_indicator.isna().sum()
df_missing_data.iloc[:, -1]

United States     160
Japan             109
United Kingdom    109
Canada            263
France            115
Switzerland         0
Germany           109
Australia         239
Netherlands       300
Sweden              0
Hong Kong         161
Spain             157
Italy             161
Singapore         300
Denmark           290
Finland           300
Belgium           300
Norway             63
China              75
Taiwan            161
India             159
Korea             167
Brazil            161
Russia            162
South Africa      174
Mexico            161
Malaysia          300
Name: Manufacturing PMI (Investing), dtype: int64

### OECD Bussiness Confidence Indicator

In [60]:
indicator_name = 'OECD Bussiness Confidence Indicator'
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'MS')
df_oecd_indicator

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,99.25,96.93,96.75,100.01,99.91,99.06,99.08,99.89,100.24,98.22,,100.91,99.64,,96.11,98.14,98.50,98.85,,,,103.24,96.63,99.72,97.37,97.22,
1999-02-01,99.55,97.05,97.04,100.14,99.87,99.28,98.90,99.80,100.26,97.85,,100.93,99.62,,96.38,98.30,98.77,98.98,,,,102.79,96.80,100.15,97.30,98.36,
1999-03-01,99.78,97.20,97.58,100.27,99.79,99.33,98.80,99.94,100.24,97.74,,101.09,99.70,,96.64,98.55,99.07,99.08,,,,101.93,97.00,100.68,97.25,99.39,
1999-04-01,99.96,97.35,98.38,100.43,99.87,99.42,98.77,100.18,100.24,97.88,,101.12,99.85,,96.81,98.98,99.55,99.06,,,,100.89,97.34,101.18,97.28,100.19,
1999-05-01,100.17,97.52,99.14,100.66,100.09,99.50,98.81,100.38,100.26,98.31,,101.13,100.20,,97.07,99.21,99.90,99.00,,,,99.99,97.93,101.72,97.38,100.66,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,98.72,100.46,100.09,,99.66,99.06,99.16,100.23,100.01,99.92,,100.08,99.90,,100.17,96.98,97.81,99.02,98.12,,,98.11,98.99,,98.81,101.34,
2023-09-01,98.79,100.51,100.04,,99.79,98.89,99.09,,99.97,99.97,,100.03,99.78,,99.87,96.81,97.84,99.19,98.28,,,98.05,98.93,,98.93,101.41,
2023-10-01,98.70,100.60,100.04,,99.83,98.79,99.06,,99.99,99.93,,100.00,99.72,,99.78,96.76,97.85,,98.15,,,98.08,98.99,,98.96,101.49,
2023-11-01,98.64,100.70,99.69,,99.83,98.91,99.06,,100.00,99.77,,100.05,99.68,,99.61,96.76,97.93,,97.92,,,98.14,99.23,,98.93,101.59,


In [61]:
df_missing_data[f'{indicator_name} (OECD)'] = df_oecd_indicator.isna().sum()
df_missing_data.iloc[:, -1]

United States       0
Japan               0
United Kingdom      0
Canada              5
France              0
Switzerland         0
Germany             0
Australia           4
Netherlands         0
Sweden              0
Hong Kong         300
Spain               0
Italy               0
Singapore         300
Denmark             0
Finland             0
Belgium             0
Norway              3
China              13
Taiwan            300
India              23
Korea               0
Brazil              0
Russia             23
South Africa        0
Mexico              0
Malaysia          300
Name: OECD Bussiness Confidence Indicator (OECD), dtype: int64

### Missing data summary

In [62]:
df_missing_data.iloc[:, [2,5,8,11,14,15,16]]

Unnamed: 0,GDP Annual Growth Rate (Combined),GDP Growth Rate (Combined),Unemployment Rate (Combined),Inflation Rate (Combined),Inflation Rate MoM (Combined),Manufacturing PMI (Investing),OECD Bussiness Confidence Indicator (OECD)
United States,0,0,0,0,0,160,0
Japan,0,0,0,0,5,109,0
United Kingdom,0,0,0,0,0,109,0
Canada,0,0,0,0,0,263,5
France,0,0,0,0,0,115,0
Switzerland,3,3,0,0,0,0,0
Germany,0,0,0,0,0,109,0
Australia,3,3,0,157,300,239,4
Netherlands,0,0,0,0,0,300,0
Sweden,0,0,0,0,0,0,0


### Getting the most recent data

Annual GDP Growth Rate and GDP Growth Rate with quaterly data have 2 quaters of delay to get final data with all revisions

In [63]:
indicator_name = 'GDP Growth Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'QS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'QS')
df_gdp_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_gdp_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-02-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-03-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-04-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
1999-05-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-09-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-10-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,
2023-11-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,


In [64]:
date = pd.Timestamp('2023-12-06')
date_minus_2_quarters = date - pd.DateOffset(months=6)

latest_known_period = pd.to_datetime(f'{date_minus_2_quarters.year}-Q{date_minus_2_quarters.quarter}') + pd.DateOffset(months=2)
df_latest = df_gdp_combined[:latest_known_period].iloc[-6:]
df_latest

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2023-01-01,2.0,0.7,0.1,3.1,0.2,0.3,-0.3,0.2,-0.3,0.6,5.3,0.6,0.6,-1.6,0.6,0.2,0.5,0.2,2.2,,2.08,0.3,1.44,,0.4,1.0,
2023-02-01,2.0,0.7,0.1,3.1,0.2,0.3,-0.3,0.2,-0.3,0.6,5.3,0.6,0.6,-1.6,0.6,0.2,0.5,0.2,2.2,,2.08,0.3,1.44,,0.4,1.0,
2023-03-01,2.0,0.7,0.1,3.1,0.2,0.3,-0.3,0.2,-0.3,0.6,5.3,0.6,0.6,-1.6,0.6,0.2,0.5,0.2,2.2,,2.08,0.3,1.44,,0.4,1.0,
2023-04-01,2.1,1.2,0.2,-0.2,0.5,0.0,0.0,0.4,-0.2,-0.8,-1.3,0.5,-0.4,0.1,-0.3,0.6,0.2,0.0,0.8,,1.89,0.6,0.9,,0.6,0.8,
2023-05-01,2.1,1.2,0.2,-0.2,0.5,0.0,0.0,0.4,-0.2,-0.8,-1.3,0.5,-0.4,0.1,-0.3,0.6,0.2,0.0,0.8,,1.89,0.6,0.9,,0.6,0.8,
2023-06-01,2.1,1.2,0.2,-0.2,0.5,0.0,0.0,0.4,-0.2,-0.8,-1.3,0.5,-0.4,0.1,-0.3,0.6,0.2,0.0,0.8,,1.89,0.6,0.9,,0.6,0.8,


Monthly indicators: Unemployment Rate, Inflation Rate, Inflation Rate MoM and OECD Bussiness Confidence Indicator have 2 months delay to get final data with all revisions

In [65]:
indicator_name = 'Inflation Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'MS')
df_inflation_combined = df_investing_indicator.combine_first(df_oecd_indicator)
df_inflation_combined

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,1.6,0.2,1.6,0.7,0.24,0.1,0.2,,2.2,-0.3,-1.1,1.5,1.4,-0.92,1.7,0.50,0.95,2.3,-1.2,0.40,9.38,1.5,1.65,96.9,8.84,19.02,5.2
1999-02-01,1.7,-0.1,1.4,0.7,0.18,0.3,0.1,1.19,2.1,-0.1,-1.7,1.8,1.3,-0.73,2.0,0.76,0.98,2.1,-1.3,2.09,8.64,0.2,2.24,103.2,8.58,18.54,3.8
1999-03-01,1.6,-0.4,1.7,1.0,0.35,0.5,0.2,,2.2,0.1,-2.6,2.2,1.3,-0.52,2.3,0.79,1.21,2.3,-1.8,-0.47,8.95,0.5,3.02,107.5,7.83,18.26,3.0
1999-04-01,1.7,-0.1,1.5,1.6,0.42,0.6,0.6,,2.1,0.1,-3.8,2.4,1.4,-0.32,2.3,1.06,1.18,2.5,-2.2,-0.10,8.36,0.4,3.35,112.9,7.57,18.23,2.9
1999-05-01,2.3,-0.4,1.3,1.5,0.41,0.6,0.5,1.04,2.3,0.1,-4.0,2.2,1.4,0.15,2.0,1.37,0.83,2.5,-2.2,0.49,7.71,0.8,3.14,116.6,7.08,18.01,2.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,3.7,3.2,6.7,4.0,4.80,1.6,6.1,5.37,3.0,7.5,1.8,2.6,5.4,4.00,2.4,5.60,4.09,4.8,0.1,2.52,6.83,3.4,4.61,5.2,4.80,4.64,2.0
2023-09-01,3.2,3.0,6.7,3.8,4.90,1.7,4.5,,0.2,6.5,2.0,3.5,5.3,4.10,0.9,5.50,0.36,3.3,0.0,2.93,5.02,3.7,5.19,6.0,5.40,4.45,1.9
2023-10-01,3.1,3.3,4.6,3.1,4.00,1.7,3.8,4.10,-0.4,6.5,2.7,3.5,1.7,4.70,0.1,4.90,0.36,4.0,-0.2,3.05,4.87,3.8,4.82,6.7,5.90,4.26,1.8
2023-11-01,3.4,2.8,3.9,3.1,3.50,1.4,3.2,4.05,1.6,5.8,2.6,3.2,0.7,3.60,0.6,3.30,0.76,4.8,-0.5,2.90,5.55,3.3,4.68,7.5,5.50,4.32,1.5


In [66]:
date = pd.Timestamp('2023-12-06')
date_minus_2_months = date - pd.DateOffset(months=2)

latest_known_period = pd.to_datetime(f'{date_minus_2_months.year}-{date_minus_2_months.month}-1')
df_inflation_combined[:latest_known_period].iloc[-6:]

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2023-05-01,3.0,3.2,8.7,3.4,5.1,2.2,6.1,6.03,6.1,9.7,2.0,3.2,7.6,5.1,2.9,6.8,5.2,6.7,0.2,2.02,4.25,3.3,3.94,2.5,6.3,5.84,2.8
2023-06-01,3.2,3.3,7.9,2.8,4.5,1.7,6.4,,5.7,9.3,1.9,1.9,6.4,4.5,2.5,6.3,4.15,6.4,0.0,1.75,4.81,2.7,3.16,3.2,5.4,5.06,2.4
2023-07-01,3.7,3.3,6.8,3.3,4.3,1.6,6.2,5.4,4.6,9.3,1.8,2.3,5.9,4.1,3.1,6.5,4.14,5.4,-0.3,1.88,7.44,2.3,3.99,4.3,4.7,4.79,2.0
2023-08-01,3.7,3.2,6.7,4.0,4.8,1.6,6.1,5.37,3.0,7.5,1.8,2.6,5.4,4.0,2.4,5.6,4.09,4.8,0.1,2.52,6.83,3.4,4.61,5.2,4.8,4.64,2.0
2023-09-01,3.2,3.0,6.7,3.8,4.9,1.7,4.5,,0.2,6.5,2.0,3.5,5.3,4.1,0.9,5.5,0.36,3.3,0.0,2.93,5.02,3.7,5.19,6.0,5.4,4.45,1.9
2023-10-01,3.1,3.3,4.6,3.1,4.0,1.7,3.8,4.1,-0.4,6.5,2.7,3.5,1.7,4.7,0.1,4.9,0.36,4.0,-0.2,3.05,4.87,3.8,4.82,6.7,5.9,4.26,1.8


PMI indicators have all values from previous month at most on day 6 each month (with very rare exeption like South Africa on day 8 every couple months)

In [67]:
indicator_name = 'Manufacturing PMI'
df_pmi_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'MS')
df_pmi_indicator

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,,,,,,46.5,,,,48.0,,,,,,,,,,,,,,,,,
1999-02-01,,,,,,48.0,,,,48.6,,,,,,,,,,,,,,,,,
1999-03-01,,,,,,48.7,,,,51.3,,,,,,,,,,,,,,,,,
1999-04-01,,,,,,51.2,,,,53.1,,,,,,,,,,,,,,,,,
1999-05-01,,,,,,51.8,,,,51.7,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,47.9,49.6,43.0,48.0,46.0,39.9,39.1,49.6,,45.8,49.8,46.5,45.4,,,,,51.4,49.7,44.3,58.6,48.9,50.1,52.7,49.7,51.2,
2023-09-01,49.8,48.5,44.3,47.5,44.2,44.9,39.6,48.7,,43.3,49.6,47.7,46.8,,,,,52.5,50.2,46.4,57.5,49.9,49.0,54.5,45.4,49.8,
2023-10-01,50.0,48.7,44.8,48.6,42.8,40.6,40.8,48.2,,45.7,48.9,45.1,44.9,,,,,47.9,49.5,47.6,55.5,49.8,48.6,53.8,45.4,52.1,
2023-11-01,49.4,48.3,47.2,47.7,42.9,42.1,42.6,47.7,,49.0,50.1,46.3,44.4,,,,,49.9,49.4,48.3,56.0,50.0,49.4,53.8,48.2,52.5,


In [68]:
date = pd.Timestamp('2023-12-06')
date_minus_1_month = date - pd.DateOffset(months=1)

latest_known_period = pd.to_datetime(f'{date_minus_1_month.year}-{date_minus_1_month.month}-1')
df_pmi_indicator[:latest_known_period].iloc[-6:]

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2023-06-01,46.3,49.8,46.5,48.8,46.0,44.9,40.6,48.2,,44.8,50.3,48.0,43.8,,,,,48.0,49.0,44.8,57.8,47.8,46.6,52.6,47.6,50.9,
2023-07-01,49.0,49.6,45.3,49.6,45.1,38.5,38.8,49.6,,47.6,49.4,47.8,44.5,,,,,56.7,49.3,44.1,57.7,49.4,47.8,52.1,47.3,50.9,
2023-08-01,47.9,49.6,43.0,48.0,46.0,39.9,39.1,49.6,,45.8,49.8,46.5,45.4,,,,,51.4,49.7,44.3,58.6,48.9,50.1,52.7,49.7,51.2,
2023-09-01,49.8,48.5,44.3,47.5,44.2,44.9,39.6,48.7,,43.3,49.6,47.7,46.8,,,,,52.5,50.2,46.4,57.5,49.9,49.0,54.5,45.4,49.8,
2023-10-01,50.0,48.7,44.8,48.6,42.8,40.6,40.8,48.2,,45.7,48.9,45.1,44.9,,,,,47.9,49.5,47.6,55.5,49.8,48.6,53.8,45.4,52.1,
2023-11-01,49.4,48.3,47.2,47.7,42.9,42.1,42.6,47.7,,49.0,50.1,46.3,44.4,,,,,49.9,49.4,48.3,56.0,50.0,49.4,53.8,48.2,52.5,


## Fill missing values

I believe the most reliable way would be to get the list of correlated countries and fill missing values with mean values from 5 most correlated countries.

In [69]:
indicator_name = 'GDP Growth Rate'
df_investing_indicator = data_provider.get_indicator_values(indicator_name, 'Investing', 'QS')
df_oecd_indicator = data_provider.get_indicator_values(indicator_name, 'OECD', 'QS')
df = df_investing_indicator.combine_first(df_oecd_indicator)
df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-02-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-03-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,,1.00,0.63,,0.91,1.65,1.04,1.02,,,1.62,3.03,0.31,,0.96,0.86,
1999-04-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
1999-05-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,,1.19,0.38,,0.52,0.53,1.37,0.04,,,2.58,4.38,0.32,,0.80,0.75,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-09-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.1,0.30,0.10,1.4,-0.70,-0.90,0.40,-0.50,1.3,,1.74,0.60,0.10,,-0.20,1.10,
2023-10-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,
2023-11-01,3.30,-0.10,-0.30,0.30,0.00,,-0.30,,0.30,0.10,0.5,0.60,0.20,1.2,2.00,,0.40,1.50,1.0,,,0.60,,,,0.10,


In [70]:
df = df.ffill(limit=3).bfill(limit=3)
countries_with_missing_data = df.columns[df.isna().sum() > 0]
countries_with_missing_data

Index(['Hong Kong', 'Singapore', 'China', 'Taiwan', 'Russia', 'Malaysia'], dtype='object')

In [71]:
country = countries_with_missing_data[0]
country

'Hong Kong'

In [72]:
missing_dates = df[df[country].isna()].index
missing_dates

DatetimeIndex(['1999-01-01', '1999-02-01', '1999-03-01', '1999-04-01',
               '1999-05-01', '1999-06-01', '1999-07-01', '1999-08-01',
               '1999-09-01', '1999-10-01',
               ...
               '2008-09-01', '2008-10-01', '2008-11-01', '2008-12-01',
               '2009-01-01', '2009-02-01', '2009-03-01', '2009-04-01',
               '2009-05-01', '2009-06-01'],
              dtype='datetime64[ns]', length=126, freq='MS')

In [73]:
month = f'{missing_dates[-1]:%Y-%m}'
month

'2009-06'

In [74]:
df_countries, _ = data_provider.get_etf_data()
bdays_in_year = 252
df_prices_last_12_months = df_countries[:month].iloc[-bdays_in_year:]
df_returns = np.log(df_prices_last_12_months).diff().dropna()
corr = df_returns.corr()
corr

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
United States,1.0,0.901993,0.919798,0.849813,0.923634,0.886232,0.907129,0.89652,0.906276,0.87883,0.891496,0.905187,0.889279,0.860327,0.487595,0.521776,0.839218,0.521584,0.377877,0.820045,0.433141,0.833579,0.884147,0.371958,0.871964,0.891181,0.769535
Japan,0.901993,1.0,0.876828,0.778865,0.888157,0.841323,0.875766,0.864787,0.865648,0.83602,0.866816,0.874124,0.868511,0.829784,0.454877,0.49458,0.794476,0.502011,0.418295,0.793273,0.385209,0.82159,0.829806,0.370371,0.812358,0.80017,0.76136
United Kingdom,0.919798,0.876828,1.0,0.847564,0.941093,0.895466,0.92374,0.890668,0.90376,0.887131,0.852451,0.910977,0.894687,0.833532,0.542523,0.568453,0.846089,0.58989,0.397259,0.801739,0.433922,0.818365,0.87417,0.432324,0.859183,0.85272,0.75978
Canada,0.849813,0.778865,0.847564,1.0,0.853334,0.813433,0.818594,0.829256,0.813041,0.819161,0.763152,0.838835,0.836114,0.735464,0.592524,0.590967,0.768534,0.631171,0.41167,0.731246,0.428262,0.734823,0.878863,0.437957,0.821701,0.832591,0.720931
France,0.923634,0.888157,0.941093,0.853334,1.0,0.927883,0.958424,0.898511,0.957192,0.930255,0.846568,0.954152,0.948682,0.825155,0.561926,0.613206,0.881153,0.602746,0.356512,0.810589,0.432414,0.808031,0.877906,0.444865,0.876029,0.855678,0.761927
Switzerland,0.886232,0.841323,0.895466,0.813433,0.927883,1.0,0.901663,0.834371,0.908473,0.875431,0.780511,0.916739,0.893676,0.747825,0.519286,0.531672,0.843945,0.525776,0.279858,0.74574,0.39606,0.735997,0.81406,0.362302,0.836499,0.79712,0.67921
Germany,0.907129,0.875766,0.92374,0.818594,0.958424,0.901663,1.0,0.877888,0.912185,0.908081,0.826989,0.926474,0.920731,0.828477,0.533366,0.57509,0.843833,0.580895,0.356607,0.811918,0.449602,0.815588,0.846973,0.435574,0.869621,0.840611,0.753413
Australia,0.89652,0.864787,0.890668,0.829256,0.898511,0.834371,0.877888,1.0,0.889061,0.845916,0.837111,0.89539,0.876972,0.844033,0.55062,0.53916,0.806136,0.579025,0.446736,0.827413,0.462097,0.830335,0.859352,0.451731,0.825318,0.822069,0.783322
Netherlands,0.906276,0.865648,0.90376,0.813041,0.957192,0.908473,0.912185,0.889061,1.0,0.901652,0.82301,0.940173,0.938447,0.8007,0.541162,0.563825,0.896922,0.562793,0.341823,0.788483,0.452674,0.780271,0.844668,0.423003,0.85054,0.823127,0.751422
Sweden,0.87883,0.83602,0.887131,0.819161,0.930255,0.875431,0.908081,0.845916,0.901652,1.0,0.812911,0.904574,0.900975,0.786453,0.543583,0.617222,0.828562,0.599243,0.319516,0.779698,0.386307,0.776567,0.822237,0.461356,0.827764,0.800014,0.74078


In [75]:
country = countries_with_missing_data[0]
most_correlated_countries = corr[country].sort_values()[::-1]
most_correlated_countries

Hong Kong         1.000000
Singapore         0.905090
United States     0.891496
Japan             0.866816
Taiwan            0.865353
United Kingdom    0.852451
Korea             0.851425
France            0.846568
Spain             0.839193
Australia         0.837111
South Africa      0.836964
Germany           0.826989
Mexico            0.826548
Brazil            0.825582
Netherlands       0.823010
Sweden            0.812911
Italy             0.803122
Belgium           0.783716
Switzerland       0.780511
Malaysia          0.771802
Canada            0.763152
Finland           0.417065
Norway            0.411716
China             0.410993
India             0.409626
Denmark           0.364044
Russia            0.278141
Name: Hong Kong, dtype: float64

In [76]:
most_correlated_countries = corr[country].sort_values()[::-1][1:].index
most_correlated_countries_with_data = most_correlated_countries[
    ~most_correlated_countries.isin(countries_with_missing_data)][:5]
most_correlated_countries_with_data

Index(['United States', 'Japan', 'United Kingdom', 'Korea', 'France'], dtype='object')

In [77]:
mean_values = df.loc[month, most_correlated_countries_with_data].mean(axis=1).round(2)
mean_values

2009-06-01    0.19
Freq: MS, dtype: float64

In [78]:
df.loc[month, country] = mean_values

In [79]:
df[country].dropna()

2009-06-01    0.19
2009-07-01    2.30
2009-08-01    2.30
2009-09-01    2.30
2009-10-01    2.30
              ... 
2023-08-01    0.10
2023-09-01    0.10
2023-10-01    0.50
2023-11-01    0.50
2023-12-01    0.50
Freq: MS, Name: Hong Kong, Length: 175, dtype: float64

Filling all indicator at once

In [80]:
corr_dict = {}

In [81]:
if len(corr_dict) == 0:
    for date in df.index:
        month = f'{date:%Y-%m}'
        corr = data_provider.calculate_correlations_for_returns(month)
        corr_dict[month] = corr

In [82]:
df = df.ffill(limit=3).bfill(limit=3)
countries_with_missing_data = df.columns[df.isna().sum() > 0]

for country in countries_with_missing_data:
    missing_dates = df[df[country].isna()].index
    
    for date in missing_dates:
        month = f'{date:%Y-%m}'
        corr = corr_dict[month]

        most_correlated_countries = corr[country].sort_values()[
            ::-1][1:].index
        most_correlated_countries_with_data = most_correlated_countries[
            ~most_correlated_countries.isin(
                countries_with_missing_data)][:5]

        mean_values = df.loc[
            date, most_correlated_countries_with_data].mean().round(2)

        df.loc[date, country] = mean_values

df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,0.69,1.00,0.63,0.92,0.91,1.65,1.04,1.02,1.40,0.93,1.62,3.03,0.31,1.36,0.96,0.86,1.05
1999-02-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,0.69,1.00,0.63,0.92,0.91,1.65,1.04,1.02,1.40,0.93,1.62,3.03,0.31,1.36,0.96,0.86,1.05
1999-03-01,0.94,-1.37,0.69,1.81,0.66,0.10,1.16,0.74,1.67,1.09,0.69,1.00,0.63,0.92,0.91,1.65,1.04,1.02,1.40,0.93,1.62,3.03,0.31,1.36,0.96,0.86,1.05
1999-04-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,0.55,1.19,0.38,0.67,0.52,0.53,1.37,0.04,1.33,1.33,2.58,4.38,0.32,1.01,0.80,0.75,0.50
1999-05-01,0.83,0.38,0.47,0.84,0.62,0.65,0.00,0.34,1.11,0.68,0.55,1.19,0.38,0.67,0.52,0.53,1.37,0.04,1.33,1.33,2.58,4.38,0.32,1.01,0.80,0.75,0.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.10,0.30,0.10,1.40,-0.70,-0.90,0.40,-0.50,1.30,1.06,1.74,0.60,0.10,0.67,-0.20,1.10,-0.14
2023-09-01,4.90,-0.70,-0.10,-1.10,-0.10,0.30,-0.10,0.20,-0.30,-0.30,0.10,0.30,0.10,1.40,-0.70,-0.90,0.40,-0.50,1.30,-0.14,1.74,0.60,0.10,0.67,-0.20,1.10,-0.14
2023-10-01,3.30,-0.10,-0.30,0.30,0.00,0.30,-0.30,0.20,0.30,0.10,0.50,0.60,0.20,1.20,2.00,-0.90,0.40,1.50,1.00,0.82,1.74,0.60,0.10,0.47,-0.20,0.10,0.04
2023-11-01,3.30,-0.10,-0.30,0.30,0.00,0.30,-0.30,0.20,0.30,0.10,0.50,0.60,0.20,1.20,2.00,-0.90,0.40,1.50,1.00,0.82,1.74,0.60,0.10,0.47,-0.20,0.10,0.00


## Testing methods in DataProvider

In [2]:
for indicator in data_provider.key_indicators:
    print(indicator)
    data_provider.get_key_indicator_values(indicator)

GDP Annual Growth Rate
GDP Growth Rate
Unemployment Rate
Inflation Rate
Inflation Rate MoM
Manufacturing PMI


Quaterly data for GBP

In [3]:
indicator = data_provider.key_indicators[0]
print(indicator)
df = data_provider.get_key_indicator_values(indicator)
df

GDP Annual Growth Rate


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,2.20,4.24,0.86,2.95,2.31,6.92,1.90,0.87,8.9,3.04,6.20,6.58,0.67,3.56,0.98,2.29,1.83
1999-02-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,2.20,4.24,0.86,2.95,2.31,6.92,1.90,0.87,8.9,3.04,6.20,6.58,0.67,3.56,0.98,2.29,1.83
1999-03-01,4.82,-0.84,2.34,4.19,2.78,1.03,0.79,5.27,4.21,4.22,2.20,4.24,0.86,2.95,2.31,6.92,1.90,0.87,8.9,3.04,6.20,6.58,0.67,3.56,0.98,2.29,1.83
1999-04-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,1.88,4.30,0.64,2.84,3.62,4.30,2.96,-0.03,7.9,2.95,7.53,12.16,-0.44,3.86,1.64,2.30,2.14
1999-05-01,4.72,-0.03,2.17,5.01,2.61,0.58,1.22,4.61,4.62,3.52,1.88,4.30,0.64,2.84,3.62,4.30,2.96,-0.03,7.9,2.95,7.53,12.16,-0.44,3.86,1.64,2.30,2.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,2.93,-2.90,0.30,0.47,0.60,0.30,-0.40,2.10,-0.80,-1.40,4.10,1.80,0.10,1.10,-0.30,-1.20,1.38,-1.49,4.9,2.32,7.02,1.40,2.00,5.10,-0.70,3.30,3.30
2023-09-01,2.93,-2.90,0.30,0.47,0.60,0.30,-0.40,2.10,-0.80,-1.40,4.10,1.80,0.10,1.10,-0.30,-1.20,1.38,-1.49,4.9,2.32,7.02,1.40,2.00,5.10,-0.70,3.30,3.30
2023-10-01,3.11,-0.40,-0.20,0.98,0.70,0.30,-0.20,2.10,-0.50,0.00,4.30,2.00,0.50,2.20,3.10,-0.80,1.56,-1.49,5.2,5.12,7.02,2.20,2.00,5.10,-0.70,2.50,3.00
2023-11-01,3.11,-0.40,-0.20,0.98,0.70,0.30,-0.20,2.10,-0.50,0.00,4.30,2.00,0.50,2.20,3.10,-0.80,1.56,-1.49,5.2,5.12,7.02,2.20,2.00,5.10,-0.70,2.50,3.00


In [4]:
date = pd.Timestamp('2023-12-06')
data_provider.get_latest_data(indicator, df, date, periods=12)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2022-07-01,1.71,-0.8,1.9,3.1,1.0,0.5,1.2,5.9,3.1,2.5,-4.5,4.4,2.6,4.1,3.4,1.0,1.9,2.31,3.9,4.01,4.4,3.1,3.6,-3.7,4.1,4.3,14.2
2022-08-01,1.71,-0.8,1.9,3.1,1.0,0.5,1.2,5.9,3.1,2.5,-4.5,4.4,2.6,4.1,3.4,1.0,1.9,2.31,3.9,4.01,4.4,3.1,3.6,-3.7,4.1,4.3,14.2
2022-09-01,1.71,-0.8,1.9,3.1,1.0,0.5,1.2,5.9,3.1,2.5,-4.5,4.4,2.6,4.1,3.4,1.0,1.9,2.31,3.9,4.01,4.4,3.1,3.6,-3.7,4.1,4.3,14.2
2022-10-01,0.65,0.1,0.6,2.07,0.5,0.8,0.3,2.7,3.2,-0.9,-4.2,2.6,1.4,2.1,1.5,0.0,1.38,1.6,2.9,-0.41,6.1,1.3,1.9,-2.7,0.9,3.6,7.0
2022-11-01,0.65,0.1,0.6,2.07,0.5,0.8,0.3,2.7,3.2,-0.9,-4.2,2.6,1.4,2.1,1.5,0.0,1.38,1.6,2.9,-0.41,6.1,1.3,1.9,-2.7,0.9,3.6,7.0
2022-12-01,0.65,0.1,0.6,2.07,0.5,0.8,0.3,2.7,3.2,-0.9,-4.2,2.6,1.4,2.1,1.5,0.0,1.38,1.6,2.9,-0.41,6.1,1.3,1.9,-2.7,0.9,3.6,7.0
2023-01-01,1.72,2.7,0.2,2.21,0.8,0.6,-0.2,2.3,1.9,0.8,2.7,4.2,1.9,0.4,1.7,-0.4,1.68,2.58,4.5,-2.87,7.8,0.9,4.0,-1.8,0.2,3.7,5.6
2023-02-01,1.72,2.7,0.2,2.21,0.8,0.6,-0.2,2.3,1.9,0.8,2.7,4.2,1.9,0.4,1.7,-0.4,1.68,2.58,4.5,-2.87,7.8,0.9,4.0,-1.8,0.2,3.7,5.6
2023-03-01,1.72,2.7,0.2,2.21,0.8,0.6,-0.2,2.3,1.9,0.8,2.7,4.2,1.9,0.4,1.7,-0.4,1.68,2.58,4.5,-2.87,7.8,0.9,4.0,-1.8,0.2,3.7,5.6
2023-04-01,2.38,3.5,0.6,1.12,0.9,0.5,-0.2,2.1,-0.2,-1.0,1.5,2.2,0.4,0.5,0.9,-0.4,1.31,0.91,6.3,1.36,7.6,0.9,3.4,4.9,1.6,3.6,2.9


Sample monthly data

In [5]:
indicator = data_provider.key_indicators[4]
print(indicator)
df = data_provider.get_key_indicator_values(indicator)
df

Inflation Rate MoM


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,0.2,-0.5,-0.6,0.2,-0.4,0.2,-0.1,0.64,0.06,0.0,0.59,0.4,0.1,0.65,-0.10,-0.21,0.33,0.4,0.2,0.16,-2.10,-0.1,0.70,8.3,0.75,2.53,0.07
1999-02-01,0.0,-0.4,0.2,0.1,0.3,0.3,0.1,0.35,0.50,0.0,0.54,0.1,0.1,0.56,0.77,0.24,0.23,0.2,1.3,-0.03,-1.19,0.4,1.05,4.2,0.00,1.34,0.10
1999-03-01,0.1,0.1,0.5,0.4,0.4,0.1,0.0,0.35,0.87,0.4,0.59,0.4,0.3,0.59,0.48,0.24,0.07,0.7,-0.8,-0.47,-0.24,0.2,1.10,2.8,0.00,0.93,0.24
1999-04-01,0.7,0.5,0.4,0.5,0.3,0.2,0.5,0.49,0.22,0.2,0.58,0.4,0.3,0.60,0.28,0.73,0.41,0.3,-1.0,-0.15,0.24,0.3,0.56,3.0,0.21,0.92,0.38
1999-05-01,0.1,0.0,0.3,0.2,0.0,-0.2,0.0,0.14,0.03,0.2,0.28,0.0,0.1,0.24,0.19,0.25,0.16,-0.2,-1.3,0.18,0.96,-0.2,0.30,2.1,-0.11,0.60,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,0.6,0.2,0.3,0.4,1.0,0.2,0.3,0.46,0.40,0.1,0.00,0.5,0.3,0.90,-0.70,-0.70,0.76,-0.8,0.3,0.18,-0.36,1.0,0.23,0.3,0.30,0.55,0.20
2023-09-01,0.4,0.3,0.5,-0.1,-0.5,-0.1,0.3,0.08,-0.40,0.5,0.40,0.2,0.2,0.50,-0.30,0.70,0.34,-0.1,0.2,0.34,-1.22,0.6,0.26,0.9,0.60,0.44,0.10
2023-10-01,0.0,0.3,0.0,0.1,0.1,0.1,0.0,0.00,0.40,0.2,1.00,0.3,-0.2,0.20,0.30,0.30,0.34,1.0,-0.1,0.33,0.65,0.3,0.24,0.8,0.90,0.38,0.10
2023-11-01,0.1,0.3,-0.2,0.1,-0.2,-0.2,-0.4,-0.24,-1.00,0.3,0.00,-0.3,-0.5,-0.20,-0.30,-0.30,0.17,0.5,-0.5,-0.07,0.51,-0.6,0.28,1.1,-0.10,0.64,0.10


In [6]:
date = pd.Timestamp('2023-12-06')
data_provider.get_latest_data(indicator, df, date, periods=12)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2022-11-01,0.1,0.3,0.4,0.1,0.3,0.0,-0.5,-0.43,-3.04,1.0,-0.59,-0.1,0.5,-0.61,-0.85,1.22,0.23,-0.2,-0.2,0.07,0.0,-0.1,0.41,0.4,0.3,0.58,-0.51
2022-12-01,-0.1,0.3,0.4,-0.6,-0.1,-0.2,-0.8,0.09,0.47,2.1,0.28,0.2,0.3,0.01,-0.6,-0.07,-0.16,0.1,0.0,0.33,-0.15,0.2,0.62,0.8,0.4,0.38,0.06
2023-01-01,0.5,0.4,-0.6,0.5,0.4,0.6,1.0,-0.12,-1.38,-1.1,0.1,-0.2,0.1,-0.14,0.43,0.57,0.09,0.2,0.8,0.39,0.38,0.8,0.53,0.8,-0.1,0.68,0.14
2023-02-01,0.4,-0.6,1.1,0.4,1.1,0.7,0.8,0.46,1.01,1.1,-0.3,0.9,0.2,0.3,0.94,0.84,-0.7,0.4,-0.5,0.17,-0.08,0.3,0.84,0.5,0.7,0.56,0.2
2023-03-01,0.1,0.3,0.8,0.5,0.7,0.2,0.8,0.6,0.22,0.6,-0.3,0.4,-0.4,0.3,-0.17,0.65,0.57,0.8,-0.3,0.27,0.45,0.2,0.71,0.4,1.0,0.27,0.2
2023-04-01,0.4,0.6,1.2,0.7,0.6,0.0,0.4,0.79,1.03,0.5,-0.3,0.6,0.4,0.3,0.26,0.31,0.67,1.1,-0.1,0.42,0.68,0.2,0.61,0.4,0.4,-0.02,0.2
2023-05-01,0.1,0.0,0.7,0.4,-0.1,0.3,-0.1,0.22,0.2,0.3,-0.3,0.0,0.3,0.3,-1.4,0.3,0.38,0.5,-0.2,-0.11,0.37,0.3,0.23,0.3,0.2,-0.22,0.2
2023-06-01,0.2,0.2,0.1,0.1,0.2,0.1,0.3,0.08,-0.3,1.1,0.2,0.6,0.0,0.5,0.3,0.2,-0.15,0.6,-0.2,-0.08,1.26,0.0,-0.08,0.4,0.2,0.1,0.2
2023-07-01,0.2,0.4,-0.4,0.6,0.1,-0.1,0.3,0.26,1.0,0.0,0.0,0.2,0.0,-0.2,1.8,0.5,0.81,0.4,0.2,0.16,2.42,0.1,0.12,0.6,0.9,0.48,0.1
2023-08-01,0.6,0.2,0.3,0.4,1.0,0.2,0.3,0.46,0.4,0.1,0.0,0.5,0.3,0.9,-0.7,-0.7,0.76,-0.8,0.3,0.18,-0.36,1.0,0.23,0.3,0.3,0.55,0.2


PMI indicator

In [7]:
indicator = data_provider.key_indicators[5]
print(indicator)
df = data_provider.get_key_indicator_values(indicator)
df

Manufacturing PMI


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-01,49.25,46.93,46.75,50.01,49.91,46.5,49.08,49.89,50.24,48.0,47.90,50.91,49.64,48.60,46.11,48.14,48.50,48.85,49.27,48.10,48.82,53.24,46.63,49.72,47.37,47.22,48.57
1999-02-01,49.55,47.05,47.04,50.14,49.87,48.0,48.90,49.80,50.26,48.6,48.27,50.93,49.62,48.94,46.38,48.30,48.77,48.98,49.23,48.50,48.97,52.79,46.80,50.15,47.30,48.36,48.60
1999-03-01,49.78,47.20,47.58,50.27,49.79,48.7,48.80,49.94,50.24,51.3,48.69,51.09,49.70,49.25,46.64,48.55,49.07,49.08,49.10,48.91,49.17,51.93,47.00,50.68,47.25,49.39,48.72
1999-04-01,49.96,47.35,48.38,50.43,49.87,51.2,48.77,50.18,50.24,53.1,49.14,51.12,49.85,49.56,46.81,48.98,49.55,49.06,48.96,49.30,49.39,50.89,47.34,51.18,47.28,50.19,48.94
1999-05-01,50.17,47.52,49.14,50.66,50.09,51.8,48.81,50.38,50.26,51.7,49.62,51.13,50.20,49.90,47.07,49.21,49.90,49.00,48.91,49.70,49.59,49.99,47.93,51.72,47.38,50.66,49.24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,47.90,49.60,43.00,48.00,46.00,39.9,39.10,49.60,50.01,45.8,49.80,46.50,45.40,46.54,50.17,46.98,47.81,51.40,49.70,44.30,58.60,48.90,50.10,52.70,49.70,51.20,47.12
2023-09-01,49.80,48.50,44.30,47.50,44.20,44.9,39.60,48.70,49.97,43.3,49.60,47.70,46.80,45.99,49.87,46.81,47.84,52.50,50.20,46.40,57.50,49.90,49.00,54.50,45.40,49.80,47.13
2023-10-01,50.00,48.70,44.80,48.60,42.80,40.6,40.80,48.20,49.99,45.7,48.90,45.10,44.90,45.04,49.78,46.76,47.85,47.90,49.50,47.60,55.50,49.80,48.60,53.80,45.40,52.10,44.60
2023-11-01,49.40,48.30,47.20,47.70,42.90,42.1,42.60,47.70,50.00,49.0,50.10,46.30,44.40,45.98,49.61,46.76,47.93,49.90,49.40,48.30,56.00,50.00,49.40,53.80,48.20,52.50,48.07


In [8]:
date = pd.Timestamp('2023-12-06')
data_provider.get_latest_data(indicator, df, date, periods=12)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
2022-12-01,46.2,48.9,45.3,49.2,49.2,54.1,47.1,50.2,50.8,45.9,49.6,46.4,48.5,48.84,51.2,48.9,48.62,50.0,47.0,44.6,57.8,48.2,44.2,53.0,53.1,51.3,50.64
2023-01-01,46.9,48.9,47.0,51.0,50.5,49.3,47.3,50.0,50.84,46.8,51.2,48.4,50.4,49.51,54.9,48.92,48.78,50.0,50.1,44.3,55.4,48.5,47.5,52.6,53.0,48.9,49.85
2023-02-01,47.3,47.7,49.3,52.4,47.4,48.9,46.3,50.5,50.85,47.0,53.9,50.7,52.0,48.75,44.1,48.73,48.91,47.5,52.6,49.0,55.3,48.5,49.2,53.6,48.8,51.0,49.55
2023-03-01,49.2,49.2,47.9,48.6,47.3,47.0,44.7,49.1,50.81,45.7,53.5,51.3,51.1,49.12,44.3,48.38,48.96,48.3,51.9,48.6,56.4,47.6,47.0,53.2,48.1,51.0,48.48
2023-04-01,50.2,49.5,47.8,50.2,45.6,45.3,44.5,48.0,50.68,45.5,52.4,49.0,46.8,47.8,44.2,48.1,48.76,51.2,49.2,47.1,57.2,48.1,44.3,52.6,49.8,51.1,49.3
2023-05-01,48.4,50.6,47.1,49.0,45.7,43.2,43.2,48.4,50.51,40.6,50.6,48.4,45.9,47.24,45.7,47.85,48.39,47.4,48.8,44.3,58.7,48.4,47.1,53.5,49.2,50.5,49.27
2023-06-01,46.3,49.8,46.5,48.8,46.0,44.9,40.6,48.2,50.33,44.8,50.3,48.0,43.8,46.79,50.38,47.51,48.06,48.0,49.0,44.8,57.8,47.8,46.6,52.6,47.6,50.9,47.09
2023-07-01,49.0,49.6,45.3,49.6,45.1,38.5,38.8,49.6,50.15,47.6,49.4,47.8,44.5,45.63,50.39,47.2,47.86,56.7,49.3,44.1,57.7,49.4,47.8,52.1,47.3,50.9,47.51
2023-08-01,47.9,49.6,43.0,48.0,46.0,39.9,39.1,49.6,50.01,45.8,49.8,46.5,45.4,46.54,50.17,46.98,47.81,51.4,49.7,44.3,58.6,48.9,50.1,52.7,49.7,51.2,47.12
2023-09-01,49.8,48.5,44.3,47.5,44.2,44.9,39.6,48.7,49.97,43.3,49.6,47.7,46.8,45.99,49.87,46.81,47.84,52.5,50.2,46.4,57.5,49.9,49.0,54.5,45.4,49.8,47.13


## Combine individual indicators into a single composite indicator

### Using simple fixed weights

In [2]:
date = pd.Timestamp('2023-12-06')
no_periods=12
df_composite = pd.DataFrame(
    data=np.zeros((no_periods, len(data_provider.selected_countries))),
    columns=data_provider.selected_countries)

for indicator in data_provider.key_indicators:
    print(indicator)
    df = data_provider.get_key_indicator_values(indicator)
    df_normalized = data_provider.normilize_dataframe(df)
    df_last_values = data_provider.get_latest_data(indicator, df_normalized, date, 
                                                   periods=no_periods)
    df_last_values.index = range(no_periods)

    weight = 0.5 if indicator == 'Manufacturing PMI' else 0.1
    print(weight)
    df_composite += df_last_values*weight

df_composite

GDP Annual Growth Rate
0.1
GDP Growth Rate
0.1
Unemployment Rate
0.1
Inflation Rate
0.1
Inflation Rate MoM
0.1
Manufacturing PMI
0.5


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
0,-0.58666,-0.441787,-0.6469,-0.110683,-0.177338,0.213787,-0.571283,-0.099123,-0.62091,-0.324617,-0.72918,-0.398124,-0.039425,-0.300071,-0.127272,0.009133,-0.185343,-0.185889,-0.51084,-0.90548,1.028935,-0.458694,-0.726102,0.260317,1.098658,0.214311,0.125527
1,-0.539079,-0.437539,-0.423058,-0.028709,-0.093048,-0.47869,-0.639336,-0.014735,0.110736,0.055137,-0.332755,-0.087992,0.167036,-0.084599,0.417258,-0.253637,-0.251487,-0.128808,-0.053455,-0.884343,0.678657,-0.346284,-0.246374,0.313697,1.101933,-0.149151,0.120069
2,-0.363599,-0.573566,-0.341699,0.38097,-0.404487,-0.354221,-0.394467,-0.004582,-0.315449,-0.584499,-0.002027,0.147158,0.308182,-0.220523,-0.83967,-0.157646,-0.232878,-0.419931,0.455859,-0.234405,0.796946,-0.225737,-0.037556,0.4294,0.432019,0.195001,0.095079
3,-0.202122,-0.566532,-0.188886,-0.285632,-0.282238,-0.583947,-0.701771,-0.168725,0.213971,-0.421375,-0.065819,0.404065,0.138703,-0.277804,-0.749137,-0.184314,-0.439813,-0.335137,-0.083087,-0.474884,0.84875,-0.539683,-0.264898,0.310732,0.325527,0.126611,-0.264535
4,-0.13523,-0.335278,-0.269159,-0.069134,-0.606829,-0.927353,-0.75388,-0.286997,-0.045352,-0.594315,-0.218075,-0.075906,-0.600393,-0.471899,-1.014099,-0.283116,-0.19965,0.137293,-0.416842,-0.656665,1.026219,-0.503945,-0.669885,0.050005,0.618559,0.065284,-0.160688
5,-0.326027,-0.12317,-0.311387,-0.186135,-0.607769,-1.257361,-1.016307,-0.210536,0.116107,-1.279481,-0.453421,-0.118552,-0.540885,-0.540452,-0.750202,-0.385519,-0.251078,-0.316527,-0.443708,-1.001631,1.279996,-0.476648,-0.320461,0.13395,0.399043,-0.088686,-0.166837
6,-0.682975,-0.266936,-0.503211,-0.190197,-0.717114,-0.968756,-1.504157,-0.379221,-0.125608,-0.669983,-0.133091,-0.252124,-0.824702,-0.724092,-0.524715,-0.437872,-0.343433,-0.339298,-0.320823,-1.125922,1.162079,-0.536158,-0.434504,0.034785,0.165965,-0.062273,-0.491097
7,-0.297863,-0.252102,-0.802174,-0.153754,-0.787791,-1.879942,-1.653198,-0.223034,-0.262953,-0.104933,-0.153603,-0.180929,-0.823758,-0.850592,-0.175893,-0.506354,-0.503548,0.851887,-0.28482,-1.220461,1.345985,-0.394713,-0.359648,-0.003897,0.1066,-0.018836,-0.447043
8,-0.427827,-0.205552,-1.23649,-0.250773,-0.68989,-1.733772,-1.61953,-0.194053,-0.030547,-0.64682,-0.146274,-0.432015,-0.708954,-0.878901,0.123763,-0.46647,-0.316452,0.080004,-0.151429,-1.147414,1.776235,-0.444684,0.001445,0.165489,0.543976,0.099922,-0.531139
9,-0.067344,-0.358717,-0.905489,-0.480187,-0.719993,-1.012431,-1.54273,-0.272874,-0.251653,-1.059681,-0.416072,-0.260052,-0.549949,-0.667803,-0.505945,-0.746998,-0.343908,-0.087578,-0.050778,-0.722351,1.020969,-0.09853,-0.146322,0.539141,-0.110608,-0.090273,-0.595222


In [3]:
data_provider.calculate_simple_composite_indicator(
    date=pd.Timestamp('2000-01-06'), periods=6)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
0,-0.118348,-0.694857,-0.326104,0.108484,0.052074,0.366535,-0.274084,-0.054192,-0.172866,0.410651,-0.281739,0.209486,-0.026507,-0.087294,-0.368784,-0.093451,-0.124329,-0.459051,-0.62876,-0.220671,0.169188,-0.035894,-0.288236,3.240286,-0.300703,0.351756,-0.165452
1,-0.016588,-0.684715,-0.357009,0.141689,0.075405,0.425886,-0.161368,-0.05513,-0.110276,0.667523,-0.222307,0.316054,0.092581,-0.00552,-0.387132,-0.048694,-0.018604,-0.528845,-0.406011,-0.272283,0.304527,-0.010949,-0.024114,3.535147,-0.397137,0.357793,-0.124329
2,-0.026478,-0.506485,-0.097875,0.165418,0.247538,0.88134,-0.25939,-0.056009,0.070186,0.858155,-0.214824,0.368602,0.036005,0.015082,-0.172272,0.043145,-0.004067,-0.50405,0.009776,0.001744,0.229237,0.231477,-0.080449,3.080185,-0.493345,0.335778,-0.115321
3,0.061616,-0.481714,0.025082,0.268153,0.320213,1.227673,-0.198921,0.027815,0.098134,1.299382,-0.13044,0.38355,0.174792,0.074055,-0.200034,0.125425,0.171167,-0.083444,0.239114,-0.143267,0.226918,0.01876,-0.110048,1.943327,-0.303638,0.394863,0.00621
4,0.03719,-0.491805,-0.078708,0.243757,0.336876,1.156625,-0.148464,0.031459,0.03254,1.059614,-0.08835,0.339685,0.228852,0.088341,-0.250826,0.155317,0.179369,-0.147974,-0.045114,-0.109632,0.474768,0.116899,0.104269,1.846011,-0.223303,0.289022,0.017679
5,0.031823,-0.648475,-0.033363,0.231658,0.310358,1.057307,-0.053432,0.025194,0.040705,1.456532,-0.081834,0.36201,0.199575,0.083084,-0.236067,0.157842,0.200979,-0.149126,-0.178758,-0.188026,0.141153,-0.063403,0.078845,1.67668,-0.230113,0.344255,0.024992


### Using PCA

In [4]:
date = pd.Timestamp('2023-12-06')
no_periods=12
indicators_norm = pd.DataFrame(
    data=np.zeros((no_periods*len(data_provider.selected_countries), 
                   len(data_provider.key_indicators))),
    columns=data_provider.key_indicators)

for indicator in data_provider.key_indicators:
    print(indicator)
    df = data_provider.get_key_indicator_values(indicator)
    df_normalized = data_provider.normilize_dataframe(df)
    df_last_values = data_provider.get_latest_data(indicator, df_normalized, date, 
                                                   periods=no_periods)

    indicators_norm.loc[:, indicator] = df_last_values.values.reshape(-1)

indicators_norm

GDP Annual Growth Rate
GDP Growth Rate
Unemployment Rate
Inflation Rate
Inflation Rate MoM
Manufacturing PMI


Unnamed: 0,GDP Annual Growth Rate,GDP Growth Rate,Unemployment Rate,Inflation Rate,Inflation Rate MoM,Manufacturing PMI
0,-0.191744,0.805435,-0.698297,0.760435,-0.286665,-1.251153
1,-0.900375,-0.271671,-0.931778,0.187008,0.132130,-0.526636
2,-0.138102,-0.303351,-0.558208,1.652434,0.341527,-1.492659
3,0.200686,0.710396,-0.324728,0.824149,-0.286665,-0.446134
4,-0.392193,-0.144953,0.165582,0.696721,0.132130,-0.446134
...,...,...,...,...,...,...
319,0.285383,0.076804,0.282322,0.403636,0.006491,-0.392466
320,0.708868,0.000773,-0.838385,0.802911,1.179116,0.788229
321,-0.222799,-0.018235,5.979253,0.633007,1.388513,-0.714474
322,0.341848,0.045125,-0.908430,0.284703,0.299648,0.439387


In [5]:
from sklearn.decomposition import PCA

In [6]:
pca = PCA(n_components=1)
pca.fit(indicators_norm)

factor_weights = pca.components_.flatten()
factor_weights

array([-0.00734421, -0.02045297,  0.97082414,  0.08892064,  0.21798762,
       -0.0400344 ])

In [7]:
compsite_indicator = pca.fit_transform(indicators_norm)
compsite_indicator

array([[-5.53718872e-01],
       [-7.41856420e-01],
       [-1.69509928e-01],
       [-2.18550046e-01],
       [ 3.59264071e-01],
       [-1.06838679e+00],
       [-2.75961695e-01],
       [-8.28005094e-01],
       [-1.93039694e+00],
       [ 6.27291148e-01],
       [-8.98171115e-01],
       [ 1.50370119e+00],
       [ 7.17238993e-01],
       [-1.25641952e+00],
       [-1.23109145e+00],
       [ 7.27102152e-01],
       [ 7.53340581e-02],
       [-8.12376214e-01],
       [-3.02192887e-01],
       [-5.85692904e-01],
       [-1.89357937e-01],
       [-8.14917117e-01],
       [ 7.00172119e-01],
       [-3.09586314e-01],
       [ 6.10458408e+00],
       [-4.68657798e-01],
       [-9.18508128e-01],
       [-6.77086262e-01],
       [-7.38079416e-01],
       [-1.91549729e-01],
       [-5.89518637e-01],
       [ 1.57048826e-01],
       [-1.13455716e+00],
       [-4.64154215e-01],
       [-5.65830359e-01],
       [-3.56983808e-01],
       [ 1.24817121e+00],
       [-5.59796028e-01],
       [ 1.6

In [8]:
compsite_indicator_arr = compsite_indicator.reshape(-1, len(data_provider.selected_countries))
compsite_indicator_df = pd.DataFrame(compsite_indicator_arr, 
                                     index=range(compsite_indicator_arr.shape[0]),
                                     columns=data_provider.selected_countries)
compsite_indicator_df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
0,-0.553719,-0.741856,-0.16951,-0.21855,0.359264,-1.068387,-0.275962,-0.828005,-1.930397,0.627291,-0.898171,1.503701,0.717239,-1.25642,-1.231091,0.727102,0.075334,-0.812376,-0.302193,-0.585693,-0.189358,-0.814917,0.700172,-0.309586,6.104584,-0.468658,-0.918508
1,-0.677086,-0.738079,-0.19155,-0.589519,0.157049,-1.134557,-0.464154,-0.56583,-0.356984,1.248171,-0.559796,1.621047,0.579092,-0.984389,-1.160502,0.183387,-0.109692,-0.618769,-0.28576,-0.463792,-0.171599,-0.590534,0.713166,0.009075,6.147527,-0.533789,-0.650635
2,-0.369728,-0.696543,-0.77094,-0.110006,0.397803,-0.755649,0.370105,-0.637399,-1.216646,-0.067313,-0.686077,1.476465,0.463062,-1.088139,-0.615877,0.48701,-0.063076,-0.525492,0.058216,-0.48293,0.122775,-0.403549,0.653444,-0.162224,6.004085,-0.440378,-0.612914
3,-0.471251,-1.145831,0.048518,-0.107123,0.72575,-0.68703,0.303052,-0.395285,-0.14578,1.109301,-0.917719,1.895498,0.530423,-0.864928,-0.385772,0.557585,-0.449496,-0.332784,-0.498425,-0.585425,-0.284475,-0.689397,0.877177,-0.332446,6.404087,-0.541921,-0.553919
4,-0.643488,-0.694794,-0.070344,-0.095662,0.550098,-0.906439,0.303316,-0.319564,-0.572973,0.743446,-0.951236,1.550319,0.229079,-0.865856,-0.908365,0.434202,0.133301,-0.222911,-0.451794,-0.522908,-0.301612,-0.729079,0.874236,-0.513284,6.524651,-0.667243,-0.591579
5,-0.436208,-0.609342,0.070654,0.01041,0.529821,-0.980837,0.130921,-0.211063,-0.208973,0.703217,-0.947012,1.459163,0.615247,-0.833396,-0.731973,0.304359,0.162715,-0.092372,-0.390203,-0.43116,0.009399,-0.764411,0.834968,-0.589587,6.183555,-0.804502,-0.593145
6,-0.590469,-0.890216,-0.104967,-0.118308,0.213365,-0.848578,-0.089762,-0.488423,-0.537711,0.629121,-0.994325,1.234679,0.529924,-0.846724,-1.583769,0.323653,0.023119,-0.482395,-0.453681,-0.692668,-0.145082,-0.746246,0.592978,-0.649431,6.090445,-0.865947,-0.580024
7,-0.592719,-0.817553,-0.335726,-0.229837,0.348641,-0.880559,0.140491,-0.590035,-0.771561,1.251325,-0.780982,1.486153,0.31747,-0.754301,-0.815448,0.317228,-0.236486,-0.445209,-0.460681,-0.687887,0.263751,-0.889037,0.37852,-0.606824,6.076671,-0.802611,-0.591312
8,-0.503458,-0.680928,-0.537354,0.047691,0.312216,-0.966112,0.110824,-0.474436,-0.174766,0.088549,-0.901126,1.311488,0.343693,-1.045819,-0.094396,0.482973,0.134064,-0.475783,-0.265393,-0.581267,0.937511,-0.80024,0.392777,-0.529184,6.198523,-0.592144,-0.662989
9,-0.343302,-0.76719,-0.257831,-0.001368,0.749659,-0.880737,0.124288,-0.374923,-0.474722,0.479866,-0.853725,1.459596,0.397737,-0.550915,-1.238053,-0.082541,0.112685,-1.019258,-0.234892,-0.594945,-0.338572,-0.472006,0.44859,-0.679634,5.968531,-0.591815,-0.609779


In [10]:
data_provider.calculate_principal_component_from_indicators(
    date=pd.Timestamp('2000-01-06'), periods=6)

GDP Annual Growth Rate
GDP Growth Rate
Unemployment Rate
Inflation Rate
Inflation Rate MoM
Manufacturing PMI


Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
0,-0.754734,-1.37282,-0.94335,-0.742657,-1.037298,-1.03696,-0.98489,-0.86165,-0.848817,-0.994911,-1.977644,-0.615971,-0.840801,-1.065685,-0.584532,-0.913733,-1.057622,-0.706837,-2.27385,-1.344994,0.034904,-1.241056,-0.402189,24.824636,0.470662,2.656629,-0.657452
1,-0.631572,-1.366123,-1.120533,-0.679203,-1.131072,-1.050841,-0.819752,-0.85697,-0.81547,-1.241808,-2.182902,-0.471841,-0.61299,-0.987723,-0.772783,-0.93573,-0.951092,-0.93203,-1.804605,-1.475952,-0.148712,-1.19941,0.182229,26.34182,-0.175717,2.579177,-0.583516
2,-0.682726,-1.032002,-0.817686,-0.639218,-0.929227,-0.820815,-1.013317,-0.749857,-0.452546,-0.989902,-2.332337,-0.428652,-0.756846,-0.968771,-0.432887,-0.866284,-0.994331,-0.928944,-1.102618,-0.69497,-0.322929,-0.614949,0.227012,24.596861,-0.650263,2.449829,-0.668164
3,-0.568215,-1.136548,-0.778933,-0.495752,-0.921988,-0.854962,-1.047133,-0.644682,-0.567892,-0.66886,-2.250294,-0.479008,-0.605343,-0.9264,-0.432187,-0.764205,-0.812734,-0.448043,-0.640319,-1.058827,-0.450887,-0.896996,0.254538,12.402601,-0.747868,2.437592,-0.635088
4,-0.578134,-1.276243,-0.98025,-0.633008,-0.898871,-0.928726,-1.010407,-0.664567,-0.724606,-0.910576,-1.881199,-0.553162,-0.529217,-0.903451,-0.512738,-0.766067,-0.820413,-0.507634,-1.312662,-1.054878,-0.288804,-0.638084,0.830454,11.326609,-0.674754,2.124192,-0.657326
5,-0.578291,-1.667004,-0.887587,-0.728247,-0.915253,-0.946047,-0.840564,-0.664996,-0.690619,-1.041222,-1.905364,-0.441523,-0.63584,-0.953658,-0.512308,-0.792667,-0.753988,-0.480437,-1.591409,-1.444521,-1.065303,-0.956607,0.984158,9.878818,-0.705058,2.01008,-0.776305


## Get dates to recalculate an algorithm after the last Manufacturing PMI Index is released

In [11]:
df_countries, benchmark = data_provider.get_etf_data()

In [12]:
sql_handler = SqlAlquemySelectDataHandler()
max_report_date = sql_handler.get_max_pmi_report_day()
max_report_date

0     1999-01-01 04:00:00
1     1999-02-01 04:00:00
2     1999-03-01 04:00:00
3     1999-04-01 04:00:00
4     1999-05-01 04:00:00
              ...        
295   2023-08-02 19:30:00
296   2023-09-04 19:30:00
297   2023-10-04 19:30:00
298   2023-11-02 19:30:00
299   2023-12-04 19:30:00
Name: MaxReportDate, Length: 300, dtype: datetime64[ns]

In [13]:
# Add extra 2 days for years 1999 to 2012 where PMI are extrapolated from Bussiness Confidence Indicators
max_report_date[max_report_date.dt.day == 1] += pd.DateOffset(days=2)
max_report_date

0     1999-01-03 04:00:00
1     1999-02-03 04:00:00
2     1999-03-03 04:00:00
3     1999-04-03 04:00:00
4     1999-05-03 04:00:00
              ...        
295   2023-08-02 19:30:00
296   2023-09-04 19:30:00
297   2023-10-04 19:30:00
298   2023-11-02 19:30:00
299   2023-12-04 19:30:00
Name: MaxReportDate, Length: 300, dtype: datetime64[ns]

In [14]:
# Get next trading day after last PMI index is published
days_to_rebalance = []
for month_start in max_report_date.values:
    from_date = month_start.astype(str)[:10]

    days_to_rebalance.append(df_countries[
        from_date:].index.values[1].astype(str)[:10])
    
days_to_rebalance

['1999-01-05',
 '1999-02-04',
 '1999-03-04',
 '1999-04-06',
 '1999-05-04',
 '1999-06-04',
 '1999-07-07',
 '1999-08-04',
 '1999-09-07',
 '1999-10-05',
 '1999-11-04',
 '1999-12-06',
 '2000-01-04',
 '2000-02-04',
 '2000-03-06',
 '2000-04-04',
 '2000-05-04',
 '2000-06-06',
 '2000-07-05',
 '2000-08-04',
 '2000-09-06',
 '2000-10-04',
 '2000-11-06',
 '2000-12-05',
 '2001-01-04',
 '2001-02-06',
 '2001-03-06',
 '2001-04-04',
 '2001-05-04',
 '2001-06-05',
 '2001-07-05',
 '2001-08-06',
 '2001-09-05',
 '2001-10-04',
 '2001-11-06',
 '2001-12-04',
 '2002-01-04',
 '2002-02-05',
 '2002-03-05',
 '2002-04-04',
 '2002-05-06',
 '2002-06-04',
 '2002-07-05',
 '2002-08-06',
 '2002-09-04',
 '2002-10-04',
 '2002-11-05',
 '2002-12-04',
 '2003-01-06',
 '2003-02-04',
 '2003-03-04',
 '2003-04-04',
 '2003-05-06',
 '2003-06-04',
 '2003-07-07',
 '2003-08-05',
 '2003-09-04',
 '2003-10-06',
 '2003-11-04',
 '2003-12-04',
 '2004-01-06',
 '2004-02-04',
 '2004-03-04',
 '2004-04-06',
 '2004-05-04',
 '2004-06-04',
 '2004-07-

In [15]:
days_to_recalculate = data_provider.get_days_to_recalculate()
days_to_recalculate

0      1999-01-05
1      1999-02-04
2      1999-03-04
3      1999-04-06
4      1999-05-04
          ...    
295    2023-08-03
296    2023-09-06
297    2023-10-05
298    2023-11-03
299    2023-12-05
Name: Days to rebalance, Length: 300, dtype: object

In [16]:
df_countries.loc[days_to_recalculate]

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,Hong Kong,Spain,Italy,Singapore,Denmark,Finland,Belgium,Norway,China,Taiwan,India,Korea,Brazil,Russia,South Africa,Mexico,Malaysia
1999-01-05,79.4712,28.2760,16.2128,5.9427,11.4526,11.6441,12.6095,3.4484,14.6994,7.8733,4.6214,13.7158,21.7406,5.0466,9.4677,23.2446,9.4558,6.9694,17.7270,12.3668,4.4565,10.3690,5.2940,1.6368,7.8505,6.3180,4.9457
1999-02-04,80.1498,29.1487,15.9189,6.0640,10.7735,10.9927,11.8047,3.6499,12.8960,7.8972,4.2441,12.1083,18.7989,4.6304,8.6420,22.9547,8.6941,7.0237,13.9601,11.6554,4.6706,9.1833,4.3237,1.5913,8.1549,6.3180,5.2060
1999-03-04,79.8504,27.2287,16.1638,5.7002,10.1252,10.4227,10.6309,3.4932,12.1815,7.4664,4.4013,11.6157,18.3853,4.8385,7.8134,22.6171,7.8010,6.5500,14.7364,13.0844,5.1786,8.5491,4.1413,2.2753,8.3640,6.6262,4.3384
1999-04-06,84.5654,33.6868,16.8985,6.1853,10.7117,10.4227,10.9998,3.6947,13.0321,7.9690,4.9672,11.6157,19.3046,5.3587,8.1019,25.9049,7.6697,7.0336,16.3971,15.1502,5.1261,10.8055,5.7724,2.6961,8.7065,8.6294,5.3796
1999-05-04,85.6257,34.0359,17.7312,6.6704,10.8043,10.7484,11.5699,4.0977,14.0189,8.1126,6.1933,11.4342,18.8909,6.6594,8.6869,24.5969,7.5646,7.8379,20.4287,16.0761,5.0217,12.7460,6.0175,3.1837,10.0005,9.4385,9.5444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-03,445.4563,61.1900,32.3700,34.7300,37.7900,46.5200,28.1600,22.3400,43.3800,34.3500,19.5500,28.1900,32.3100,19.3300,104.2500,33.9100,18.5300,23.1275,48.4500,46.1000,43.6000,65.4500,32.2900,8.0600,40.9500,61.2000,21.3500
2023-09-06,442.8560,62.2300,31.5800,34.2800,36.6100,45.0700,27.2300,21.8600,40.6400,33.2400,18.0900,27.4700,31.1600,18.3400,103.7400,34.1100,18.0100,22.6263,45.3400,45.5500,44.1700,62.0200,30.7600,8.0600,38.4600,59.6900,20.9500
2023-10-05,422.7859,59.2300,31.1000,32.4800,34.9900,43.1800,25.7100,21.0900,37.5300,32.5300,17.0400,26.7200,29.6900,17.9800,99.0500,31.7900,16.8700,21.9087,42.1800,44.8100,44.1600,57.9800,29.1000,8.0600,36.1500,54.2000,20.3600
2023-11-03,432.9347,62.0000,31.3300,33.8000,35.6500,43.1900,26.1500,21.8100,39.5000,33.0800,17.5400,27.8500,31.3400,18.2800,99.8800,32.8900,17.6400,22.2036,43.2500,45.7400,44.1500,59.6700,31.9000,8.0600,40.5300,59.5300,21.2200
