In [136]:
from src.SqlAlquemyInsertHandler import SqlAlquemyInsertHandler
import pandas as pd

from dotenv import load_dotenv
load_dotenv()

True

In [137]:
all_countries = ['United States', 'Japan', 'United Kingdom', 'Canada',
                 'France', 'Switzerland', 'Germany', 'Australia',
                 'Netherlands', 'Sweden', 'Spain', 'Hong Kong',
                 'Italy', 'Singapore', 'Belgium', 'Norway', 'Israel',
                 'Ireland', 'New Zealand', 'Austria', 'Euro Zone',
                 'China', 'Taiwan', 'India', 'South Korea', 'Brazil',
                 'Saudi Arabia', 'South Africa', 'Mexico', 'Indonesia',
                 'Türkiye', 'Poland', 'Argentina', 'Russia']

In [138]:
def process_quaterly_indicator(indicator, alt_indicator):
    sql_handler = SqlAlquemyInsertHandler()
    df = sql_handler.read_indicator(indicator, alt_indicator)

    df_processed = df.loc[:, ['ReportDateTime', 'Country', 'Indicator']]

    prefixes = ['Spanish ', 'French ', 'German ', 'Austrian ',
                'Italian ', 'Belgian ', 'Dutch ', 'Irish ']
    for prefix in prefixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removeprefix(
            prefix)

    suffixes = [' (Q1)', ' (Q2)', ' (Q3)', ' (Q4)']
    for suffix in suffixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removesuffix(
            suffix)

    df_processed['Indicator'].replace(alt_indicator, indicator, inplace=True)

    periods = pd.PeriodIndex(df_processed['ReportDateTime'], freq='Q')-1
    df_processed['Period'] = periods.to_timestamp()

    df_processed['Value'] = df['Actual'].str.rstrip('%').astype(float)/100

    df_processed = df_processed[df_processed['Indicator'] == indicator]

    missing_countries = [x for x in all_countries
                         if x not in df_processed['Country'].unique()]
    print('Missing countries')
    print(missing_countries)

    print('Avg publish delay: ', (df_processed['ReportDateTime'] -
                                  df_processed['Period']).mean())

    return df_processed

In [139]:
def transform_to_quarterly_df_countries(df_processed):
    period = pd.period_range('2012-01-01', '2023-07-01', freq='Q')
    period = period.to_timestamp()

    df_processed.index = df_processed['Period']
    df_countries = pd.DataFrame(index=period, columns=all_countries)
    for country in all_countries:
        values = df_processed[df_processed['Country'] == country]['Value']
        values = values[~values.index.duplicated(keep='first')]
        df_countries.loc[:, country] = values

    return df_countries

In [140]:
def transform_to_monthly_df_countries(df_processed):
    period = pd.period_range('2012-01-01', '2023-10-01', freq='M')
    period = period.to_timestamp()

    df_processed.index = df_processed['Period']
    df_countries = pd.DataFrame(index=period, columns=all_countries)
    for country in all_countries:
        values = df_processed[df_processed['Country'] == country]['Value']
        values = values[~values.index.duplicated(keep='first')]
        df_countries.loc[:, country] = values

    return df_countries

# GDP (QoQ)

In [141]:
df_processed = process_quaterly_indicator('GDP (QoQ)', 'GDP Annualized (QoQ)')
df_processed

2274 indicators read
Missing countries
['Taiwan', 'India', 'Saudi Arabia', 'Türkiye', 'Argentina', 'Russia']
Avg publish delay:  141 days 22:21:24.870881568


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-11 06:10:00,Euro Zone,GDP (QoQ),2011-10-01,0.001
1,2012-01-25 04:30:00,United Kingdom,GDP (QoQ),2011-10-01,-0.002
2,2012-01-25 18:00:00,South Korea,GDP (QoQ),2011-10-01,0.004
3,2012-01-27 08:30:00,United States,GDP (QoQ),2011-10-01,0.028
4,2012-01-30 03:00:00,Spain,GDP (QoQ),2011-10-01,-0.003
...,...,...,...,...,...
2269,2023-11-14 04:00:00,Poland,GDP (QoQ),2023-07-01,0.014
2270,2023-11-14 05:00:00,Euro Zone,GDP (QoQ),2023-07-01,-0.001
2271,2023-11-14 18:50:00,Japan,GDP (QoQ),2023-07-01,-0.005
2272,2023-11-16 06:00:00,Israel,GDP (QoQ),2023-07-01,0.028


In [142]:
transform_to_quarterly_df_countries(df_processed).tail(10)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2021-04-01,0.065,0.003,0.048,-0.003,0.009,0.018,0.015,0.007,0.031,0.009,...,0.007,-0.001,,0.012,0.015,0.0331,,0.019,,
2021-07-01,0.02,-0.008,0.013,0.013,0.03,0.017,0.018,-0.019,0.019,0.018,...,0.003,-0.001,,-0.015,-0.002,0.0155,,0.021,,
2021-10-01,0.069,0.013,0.01,0.016,0.007,0.003,-0.007,0.034,0.009,0.014,...,0.011,0.005,,,-0.001,0.0106,,0.017,,
2022-01-01,-0.014,-0.002,0.008,0.008,0.0,0.005,0.002,0.008,0.0,-0.004,...,0.007,0.01,,0.019,0.009,-0.0096,,0.024,,
2022-04-01,-0.009,0.005,0.004,0.008,0.005,0.003,0.0,0.009,0.026,0.014,...,0.007,0.012,,-0.007,0.01,0.0372,,-0.023,,
2022-07-01,0.026,-0.003,-0.002,0.007,0.002,0.002,0.003,0.006,-0.002,0.007,...,0.003,0.004,,0.016,0.01,0.0181,,0.009,,
2022-10-01,0.029,0.002,0.0,0.0,0.001,0.0,-0.002,0.005,0.006,-0.006,...,-0.004,-0.002,,-0.013,0.004,0.0036,,-0.024,,
2023-01-01,0.011,0.004,0.001,0.008,0.002,0.003,-0.001,0.002,-0.007,0.002,...,0.003,0.019,,0.004,0.011,-0.0092,,0.039,,
2023-04-01,0.024,0.015,0.002,0.0,0.005,0.0,0.0,0.004,-0.003,-0.015,...,0.006,0.009,,0.006,0.009,0.0386,,-0.037,,
2023-07-01,0.049,-0.005,0.0,,0.001,,-0.001,,-0.002,0.0,...,0.006,,,,0.009,0.016,,0.014,,


# GDP (YoY)

In [143]:
df_processed = process_quaterly_indicator('GDP (YoY)', 'GDP Quarterly (YoY)')
df_processed

1924 indicators read
Missing countries
['United States', 'Belgium', 'Norway', 'Israel', 'Austria']
Avg publish delay:  142 days 07:54:10.749083290


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-16 21:00:00,China,GDP (YoY),2011-10-01,0.089
1,2012-01-31 03:00:00,Taiwan,GDP (YoY),2011-10-01,0.019
2,2012-02-05 23:00:00,Indonesia,GDP (YoY),2011-10-01,0.066
3,2012-02-15 03:30:00,Netherlands,GDP (YoY),2011-10-01,-0.007
4,2012-02-16 00:00:00,Singapore,GDP (YoY),2011-10-01,0.036
...,...,...,...,...,...
1919,2023-11-14 04:00:00,Poland,GDP (YoY),2023-07-01,0.004
1920,2023-11-14 05:00:00,Euro Zone,GDP (YoY),2023-07-01,0.001
1921,2023-11-14 18:50:00,Japan,GDP (YoY),2023-07-01,-0.021
1922,2023-11-15 11:00:00,Russia,GDP (YoY),2023-07-01,0.055


In [144]:
transform_to_quarterly_df_countries(df_processed).tail(10)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2021-04-01,,0.013,0.222,0.1272,0.187,0.077,0.096,0.096,0.097,0.1,...,0.059,0.124,0.018,0.193,0.197,0.0707,,0.109,0.179,0.103
2021-07-01,,-0.03,0.069,0.0397,0.033,0.041,0.025,0.039,0.05,0.047,...,0.04,0.04,0.07,0.029,0.046,0.0351,,0.051,0.119,0.043
2021-10-01,,0.054,0.065,0.038,0.054,0.037,0.014,0.042,0.062,0.062,...,0.041,0.016,0.068,0.017,0.01,0.0502,,0.057,0.086,
2022-01-01,,-0.01,0.095,0.0289,0.053,0.044,0.04,0.033,0.07,0.03,...,0.031,0.017,0.099,0.03,0.016,0.0501,,0.085,0.06,0.05
2022-04-01,,0.022,0.035,0.0456,0.042,0.028,0.015,0.036,0.053,0.042,...,0.029,0.032,0.118,0.002,0.021,0.0544,,0.053,0.069,-0.04
2022-07-01,,-0.012,0.02,0.0393,0.01,0.005,0.011,0.059,0.031,0.026,...,0.031,0.036,0.086,0.041,0.042,0.0572,,0.035,0.059,-0.04
2022-10-01,,0.006,0.002,0.028,0.005,0.008,0.005,0.027,0.03,-0.006,...,0.014,0.019,0.055,0.009,0.036,0.0501,,0.049,0.019,
2023-01-01,,0.016,0.002,0.0221,0.008,0.006,0.002,0.023,0.019,0.003,...,0.008,0.04,0.038,0.002,0.039,0.0503,,-0.002,0.013,-0.027
2023-04-01,,0.06,0.004,0.0112,0.009,0.005,-0.002,0.021,-0.003,-0.024,...,0.009,0.034,0.012,0.016,0.037,0.0517,,-0.005,-0.049,0.049
2023-07-01,,-0.021,0.006,,0.007,,-0.003,,-0.006,-0.012,...,0.014,,-0.045,,0.033,0.0494,,0.004,,0.055


# Manufacturing PMI

In [145]:
def process_pmi_indicator(indicator, alt_indicator = ''):
    sql_handler = SqlAlquemyInsertHandler()
    df = sql_handler.read_indicator(indicator, alt_indicator)

    df_processed = df.loc[:, ['ReportDateTime', 'Country', 'Indicator']]

    prefixes = [
        'S&P Global Hong Kong ',
        'S&P Global Canada ',
        'S&P Global/CIPS UK ',
        'S&P Global / CIPS UK ',
        'S&P Global US ',
        'S&P Global Mexico ',
        'S&P Global India ',
        'S&P Global South Korea ',
        'HCOB Germany ',
        'HCOB Eurozone ',
        'S&P Global ',
        'Judo Bank Australia ',
        'au Jibun Bank Japan ',
        'HCOB France ',
        'S&P Global Taiwan ',
        'Unicredit Bank Austria ',
        'Nikkei ',
        'AIB Ireland ',
        'Caixin ',
        'Russian S&P Global ',
        'Poland ',
        'HCOB Spain ',
        'HCOB Italy ',
        'Taiwan ',
        'Riyad Bank Saudi Arabia ',
        'Chinese '
    ]
    for prefix in prefixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removeprefix(
            prefix)

    suffixes = [' (Jan)', ' (Feb)', ' (Mar)', ' (Apr)', ' (May)', ' (Jun)',
                ' (Jul)', ' (Aug)', ' (Sep)', ' (Oct)', ' (Nov)', ' (Dec)',
                ' (MoM)']
    for suffix in suffixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removesuffix(
            suffix)

    df_processed['Indicator'].replace(alt_indicator, indicator, inplace=True)

    periods = pd.PeriodIndex(df_processed['ReportDateTime'], freq='M')
    df_processed['Period'] = periods.to_timestamp()

    # correct period when logged at the beginning of the month
    prev_periods = pd.PeriodIndex(df_processed[
        df_processed['ReportDateTime'].dt.day < 15]['ReportDateTime'], freq='M')-1
    df_processed.loc[df_processed['ReportDateTime'].dt.day < 15, 'Period'] = prev_periods.to_timestamp()

    df_processed['Value'] = df['Actual'].astype(float)

    df_processed = df_processed[df_processed['Indicator'] == indicator]

    missing_countries = [x for x in all_countries
                        if x not in df_processed['Country'].unique()]
    print('Missing countries')
    print(missing_countries)

    print('Avg publish delay: ', (df_processed['ReportDateTime'] -
                                df_processed['Period']).mean())

    return df_processed

In [146]:
manufacturing_pmi = process_pmi_indicator('Manufacturing PMI', 'procure.ch PMI')
manufacturing_pmi.tail(28)

4915 indicators read
Missing countries
['Netherlands', 'Singapore', 'Belgium', 'Israel', 'New Zealand', 'Saudi Arabia', 'Türkiye', 'Argentina']
Avg publish delay:  29 days 07:21:43.594697820


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
4884,2023-10-30 20:30:00,China,Manufacturing PMI,2023-10-01,49.5
4886,2023-10-31 17:00:00,Australia,Manufacturing PMI,2023-10-01,48.2
4887,2023-10-31 19:30:00,South Korea,Manufacturing PMI,2023-10-01,49.8
4888,2023-10-31 19:30:00,Japan,Manufacturing PMI,2023-10-01,48.7
4889,2023-10-31 19:30:00,Taiwan,Manufacturing PMI,2023-10-01,47.6
4890,2023-10-31 19:30:00,Indonesia,Manufacturing PMI,2023-10-01,51.5
4891,2023-10-31 20:01:00,Ireland,Manufacturing PMI,2023-10-01,48.2
4892,2023-10-31 20:45:00,China,Manufacturing PMI,2023-10-01,49.5
4893,2023-11-01 00:00:00,India,Manufacturing PMI,2023-10-01,55.5
4894,2023-11-01 01:00:00,Russia,Manufacturing PMI,2023-10-01,53.8


In [147]:
transform_to_monthly_df_countries(manufacturing_pmi)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2012-01-01,,50.7,52.1,,48.5,47.3,50.9,,,51.3,...,,,,,,,,,,
2012-02-01,,50.5,51.2,,50.2,49.0,50.1,,,50.2,...,,,,57.9,,,,,,
2012-03-01,,51.1,52.1,,47.6,51.1,48.1,,,49.9,...,,,,55.1,,,,,,
2012-04-01,,50.7,50.5,,47.3,46.9,46.3,,,50.2,...,51.9,,,53.7,,,,,,
2012-05-01,54.0,50.7,45.9,,44.4,45.4,45.0,,,49.0,...,51.03,,,53.6,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-01,46.3,49.8,46.2,48.8,45.5,44.9,41.0,48.6,,44.8,...,47.8,46.6,,47.6,50.9,52.5,,45.1,,52.6
2023-07-01,49.0,49.4,45.0,49.6,44.5,38.5,38.8,49.6,,47.6,...,49.4,47.8,,47.3,50.9,53.3,,43.5,,52.1
2023-08-01,47.0,49.7,42.5,48.0,46.4,39.9,39.1,49.4,,45.8,...,48.9,50.1,,49.7,51.2,53.9,,43.1,,52.7
2023-09-01,48.9,48.6,44.2,47.5,43.6,44.9,39.8,48.2,,43.3,...,49.9,49.0,,45.4,49.8,52.3,,43.9,,54.5


In [148]:
services_pmi = process_pmi_indicator('Services PMI')
services_pmi.tail(15)

2636 indicators read
Missing countries
['Canada', 'Switzerland', 'Netherlands', 'Hong Kong', 'Singapore', 'Belgium', 'Norway', 'Israel', 'New Zealand', 'Austria', 'Taiwan', 'South Korea', 'Saudi Arabia', 'South Africa', 'Mexico', 'Indonesia', 'Türkiye', 'Poland', 'Argentina']
Avg publish delay:  30 days 11:14:18.437025796


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
2621,2023-11-02 20:01:00,Ireland,Services PMI,2023-10-01,52.6
2622,2023-11-02 20:45:00,China,Services PMI,2023-10-01,50.4
2623,2023-11-03 00:00:00,India,Services PMI,2023-10-01,58.4
2624,2023-11-03 01:00:00,Russia,Services PMI,2023-10-01,53.6
2625,2023-11-03 02:30:00,Sweden,Services PMI,2023-10-01,48.5
2626,2023-11-03 04:30:00,United Kingdom,Services PMI,2023-10-01,49.5
2627,2023-11-03 08:45:00,United States,Services PMI,2023-10-01,50.6
2628,2023-11-05 19:30:00,Japan,Services PMI,2023-10-01,51.6
2629,2023-11-06 03:15:00,Spain,Services PMI,2023-10-01,51.1
2630,2023-11-06 03:45:00,Italy,Services PMI,2023-10-01,47.7


In [149]:
composite_pmi = process_pmi_indicator('Composite PMI')
composite_pmi.tail(9)

1392 indicators read
Missing countries
['Japan', 'Canada', 'Switzerland', 'Australia', 'Netherlands', 'Sweden', 'Spain', 'Hong Kong', 'Singapore', 'Belgium', 'Norway', 'Israel', 'Ireland', 'New Zealand', 'Austria', 'Taiwan', 'India', 'South Korea', 'South Africa', 'Mexico', 'Indonesia', 'Türkiye', 'Poland', 'Argentina', 'Russia']
Avg publish delay:  29 days 13:35:37.456896551


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1383,2023-10-30 20:30:00,China,Composite PMI,2023-10-01,50.7
1384,2023-11-03 04:30:00,United Kingdom,Composite PMI,2023-10-01,48.7
1385,2023-11-03 08:45:00,United States,Composite PMI,2023-10-01,50.7
1386,2023-11-04 23:15:00,Saudi Arabia,Composite PMI,2023-10-01,58.4
1387,2023-11-06 03:45:00,Italy,Composite PMI,2023-10-01,47.0
1388,2023-11-06 03:50:00,France,Composite PMI,2023-10-01,44.6
1389,2023-11-06 03:55:00,Germany,Composite PMI,2023-10-01,45.9
1390,2023-11-06 04:00:00,Euro Zone,Composite PMI,2023-10-01,46.5
1391,2023-11-06 08:00:00,Brazil,Composite PMI,2023-10-01,50.3


In [150]:
construction_pmi = process_pmi_indicator('Construction PMI')
construction_pmi.tail(5)

235 indicators read
Missing countries
['United States', 'Japan', 'Canada', 'Switzerland', 'Australia', 'Netherlands', 'Sweden', 'Spain', 'Hong Kong', 'Singapore', 'Belgium', 'Norway', 'Israel', 'Ireland', 'New Zealand', 'Austria', 'China', 'Taiwan', 'India', 'South Korea', 'Brazil', 'Saudi Arabia', 'South Africa', 'Mexico', 'Indonesia', 'Türkiye', 'Poland', 'Argentina', 'Russia']
Avg publish delay:  34 days 02:11:21.702127659


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
230,2023-11-06 04:30:00,United Kingdom,Construction PMI,2023-10-01,45.6
231,2023-11-07 03:30:00,Italy,Construction PMI,2023-10-01,51.8
232,2023-11-07 03:30:00,Germany,Construction PMI,2023-10-01,38.3
233,2023-11-07 03:30:00,France,Construction PMI,2023-10-01,41.0
234,2023-11-07 03:30:00,Euro Zone,Construction PMI,2023-10-01,42.7


# Indicador's exploration

In [177]:
sql_handler = SqlAlquemyInsertHandler()
df = sql_handler.read_all_indicators(2021)

df_processed = df.loc[:, ['ReportDateTime', 'Country', 'Indicator']]

prefixes = [
    'Austrian ',
    'Italian ',
    'Chinese ',
    'Dutch ',
    'German ',
    'French ',
    'Monthly ',
    'Spanish ',
    'Quarterly ',
    'S&P Global Hong Kong ',
    'S&P Global Canada ',
    'S&P Global/CIPS UK ',
    'S&P Global / CIPS UK ',
    'S&P Global US ',
    'S&P Global Mexico ',
    'S&P Global India ',
    'S&P Global South Korea ',
    'HCOB Germany ',
    'HCOB Eurozone ',
    'S&P Global ',
    'Judo Bank Australia ',
    'au Jibun Bank Japan ',
    'HCOB France ',
    'S&P Global Taiwan ',
    'Unicredit Bank Austria ',
    'Nikkei ',
    'AIB Ireland ',
    'Caixin ',
    'Russian S&P Global ',
    'Poland ',
    'HCOB Spain ',
    'HCOB Italy ',
    'Taiwan ',
    'Riyad Bank Saudi Arabia ',
    'Chinese ',
    'National ',
    'Irish ',
    'Belgium '
]
for prefix in prefixes:
    df_processed['Indicator'] = df_processed['Indicator'].str.removeprefix(
        prefix)

suffixes = [' (Jan)', ' (Feb)', ' (Mar)', ' (Apr)', ' (May)', ' (Jun)',
            ' (Jul)', ' (Aug)', ' (Sep)', ' (Oct)', ' (Nov)', ' (Dec)',
            ' s.a.', ' (Q1)', ' (Q2)', ' (Q3)', ' (Q4)']
for suffix in suffixes:
    df_processed['Indicator'] = df_processed['Indicator'].str.removesuffix(
        suffix)
    
grouped_indicators = df_processed.groupby(['Indicator']).count().sort_values(by='ReportDateTime',ascending=False)
grouped_indicators

15708 indicators read


Unnamed: 0_level_0,ReportDateTime,Country
Indicator,Unnamed: 1_level_1,Unnamed: 2_level_1
CPI (YoY),437,437
Manufacturing PMI,393,393
CPI (MoM),381,381
Trade Balance,361,361
Unemployment Rate,310,310
...,...,...
Steel Production (Metric Ton),1,1
7-Year Obligacion Auction,1,1
IGP-10 Inflation Index (MoM),1,1
Gross Debt-to-GDP ratio (MoM),1,1


In [179]:
for indicator in grouped_indicators.index:
    countries = df_processed[df_processed['Indicator'] == indicator].groupby('Country').count().index.shape[0]
    if countries >= 10:
        print(indicator, grouped_indicators.loc[indicator, 'Country'], ',', countries)

CPI (YoY) 437 , 31
Manufacturing PMI 393 , 25
CPI (MoM) 381 , 25
Trade Balance 361 , 27
Unemployment Rate 310 , 28
Retail Sales (YoY) 261 , 21
Services PMI 251 , 14
PPI (YoY) 196 , 16
Industrial Production (YoY) 192 , 16
GDP (YoY) 177 , 26
GDP (QoQ) 177 , 25
Retail Sales (MoM) 168 , 13
Industrial Production (MoM) 156 , 12
PPI (MoM) 156 , 12
Consumer Confidence 153 , 12
Interest Rate Decision 87 , 11
Current Account 64 , 10


# Unemployment Rate

In [174]:
def process_monthly_indicator(indicator, alt_indicator=''):
    sql_handler = SqlAlquemyInsertHandler()
    df = sql_handler.read_indicator(indicator, alt_indicator)

    df_processed = df.loc[:, ['ReportDateTime', 'Country', 'Indicator']]

    prefixes = [
        'Austrian ',
        'Italian ',
        'Chinese ',
        'Dutch ',
        'German ',
        'French ',
        'Monthly ',
        'Spanish ',
        'Quarterly ',
        'National ',
        'Irish ',
        'Belgium ',
        'Turkish ',
        'Electronic Card ',
        'Brazilian '
    ]
    for prefix in prefixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removeprefix(
            prefix)

    suffixes = [' (Jan)', ' (Feb)', ' (Mar)', ' (Apr)', ' (May)', ' (Jun)',
                ' (Jul)', ' (Aug)', ' (Sep)', ' (Oct)', ' (Nov)', ' (Dec)',
                ' s.a.', ' (Q1)', ' (Q2)', ' (Q3)', ' (Q4)']
    for suffix in suffixes:
        df_processed['Indicator'] = df_processed['Indicator'].str.removesuffix(
            suffix)
    
    df_processed['Indicator'].replace(alt_indicator, indicator, inplace=True)

    periods = pd.PeriodIndex(df_processed['ReportDateTime'], freq='M')-1
    df_processed['Period'] = periods.to_timestamp()

    df_processed['Value'] = df['Actual'].str.rstrip('%').str.replace(',', '').astype(float)/100

    df_processed = df_processed[df_processed['Indicator'] == indicator]

    missing_countries = [x for x in all_countries
                        if x not in df_processed['Country'].unique()]
    print('Missing countries')
    print(missing_countries)

    print('Avg publish delay: ', (df_processed['ReportDateTime'] -
                                df_processed['Period']).mean())

    return df_processed

In [154]:
unemployment_rate = process_monthly_indicator(indicator='Unemployment Rate')
unemployment_rate.head(30)

4160 indicators read
Missing countries
['Belgium', 'Ireland', 'India', 'Saudi Arabia', 'Indonesia', 'Türkiye']
Avg publish delay:  46 days 05:42:57.194672712


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1,2012-01-01 04:00:00,Hong Kong,Unemployment Rate,2011-12-01,0.033
2,2012-01-01 04:00:00,Mexico,Unemployment Rate,2011-12-01,0.0501
3,2012-01-01 04:00:00,Sweden,Unemployment Rate,2011-12-01,0.073
4,2012-01-01 04:00:00,Netherlands,Unemployment Rate,2011-12-01,0.054
5,2012-01-01 04:00:00,Taiwan,Unemployment Rate,2011-12-01,0.0423
6,2012-01-01 04:00:00,Poland,Unemployment Rate,2011-12-01,0.125
7,2012-01-01 04:00:00,Austria,Unemployment Rate,2011-12-01,0.082
9,2012-01-01 04:00:00,Norway,Unemployment Rate,2011-12-01,0.033
10,2012-01-03 03:55:00,Germany,Unemployment Rate,2011-12-01,0.068
11,2012-01-05 04:00:00,Italy,Unemployment Rate,2011-12-01,0.086


In [155]:
df_countries = transform_to_monthly_df_countries(unemployment_rate)
df_countries

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2012-01-01,0.083,,0.084,0.076,,0.031,0.068,0.051,0.055,0.084,...,0.032,0.055,,,0.0466,,,0.132,0.067,0.066
2012-02-01,0.083,0.046,0.084,0.074,0.094,0.031,0.067,0.052,0.06,0.082,...,0.037,0.057,,,0.0536,,,0.135,,0.065
2012-03-01,0.082,0.045,0.083,0.072,,0.031,,0.052,0.059,0.077,...,0.034,0.062,,,0.0501,,,0.133,,0.065
2012-04-01,0.081,0.046,0.082,0.073,,0.031,0.068,0.049,0.062,0.078,...,0.034,0.06,,0.252,0.0489,,,0.129,0.071,0.058
2012-05-01,0.082,0.044,0.082,0.073,0.1,0.032,0.068,0.051,0.062,0.081,...,0.032,0.058,,,0.0478,,,0.126,,0.054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-01,0.036,0.025,0.04,0.054,,0.02,,0.035,0.035,0.062,...,0.026,0.08,,,0.027,,,0.05,,
2023-07-01,0.035,0.027,0.042,0.055,0.072,0.021,0.056,0.037,0.036,0.063,...,0.028,0.079,,0.326,0.029,,,0.05,,0.031
2023-08-01,0.038,0.027,0.043,0.055,,0.021,0.057,0.037,0.036,0.064,...,0.024,0.078,,,0.027,,,0.05,0.062,0.03
2023-09-01,0.038,0.026,0.042,0.055,,0.021,,0.036,0.037,0.064,...,0.026,0.077,,,0.027,,,0.05,,


In [156]:
for country in df_countries.columns:
    print(country, df_countries[country].last_valid_index())

United States 2023-10-01 00:00:00
Japan 2023-09-01 00:00:00
United Kingdom 2023-10-01 00:00:00
Canada 2023-10-01 00:00:00
France 2023-10-01 00:00:00
Switzerland 2023-10-01 00:00:00
Germany 2023-10-01 00:00:00
Australia 2023-10-01 00:00:00
Netherlands 2023-10-01 00:00:00
Sweden 2023-10-01 00:00:00
Spain 2023-09-01 00:00:00
Hong Kong 2023-10-01 00:00:00
Italy 2023-10-01 00:00:00
Singapore 2023-09-01 00:00:00
Belgium None
Norway 2023-09-01 00:00:00
Israel 2023-10-01 00:00:00
Ireland None
New Zealand 2023-09-01 00:00:00
Austria 2023-10-01 00:00:00
Euro Zone 2023-10-01 00:00:00
China 2023-10-01 00:00:00
Taiwan 2023-10-01 00:00:00
India None
South Korea 2023-10-01 00:00:00
Brazil 2023-09-01 00:00:00
Saudi Arabia None
South Africa 2023-10-01 00:00:00
Mexico 2023-09-01 00:00:00
Indonesia None
Türkiye None
Poland 2023-09-01 00:00:00
Argentina 2023-08-01 00:00:00
Russia 2023-10-01 00:00:00


# CPI

In [157]:
cpi_mom = process_monthly_indicator(indicator='CPI (MoM)')
cpi_mom

5366 indicators read
Missing countries
['Australia', 'New Zealand', 'India', 'Indonesia']
Avg publish delay:  44 days 00:05:33.719714964


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-01 04:00:00,United Kingdom,CPI (MoM),2011-12-01,0.0040
2,2012-01-01 04:00:00,South Korea,CPI (MoM),2011-12-01,0.0040
3,2012-01-01 04:00:00,Spain,CPI (MoM),2011-12-01,0.0010
4,2012-01-01 04:00:00,Brazil,CPI (MoM),2011-12-01,0.0050
6,2012-01-01 04:00:00,China,CPI (MoM),2011-12-01,0.0030
...,...,...,...,...,...
5358,2023-11-17 03:00:00,Austria,CPI (MoM),2023-10-01,0.0033
5360,2023-11-17 05:00:00,Euro Zone,CPI (MoM),2023-10-01,0.0010
5361,2023-11-21 03:30:00,Hong Kong,CPI (MoM),2023-10-01,0.0100
5363,2023-11-21 08:30:00,Canada,CPI (MoM),2023-10-01,0.0010


In [158]:
transform_to_monthly_df_countries(cpi_mom)

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2012-01-01,0.002,,-0.005,0.004,-0.004,-0.004,-0.004,,,-0.009,...,0.004,0.0056,0.0,0.0063,0.0071,,0.0056,0.0072,,0.005
2012-02-01,0.004,,0.006,0.004,0.004,0.003,0.007,,,0.007,...,0.004,0.0045,0.002,0.006,0.002,,0.0056,0.004,,0.004
2012-03-01,0.003,,0.003,0.004,0.008,0.006,0.003,,,0.003,...,0.0,0.0021,0.003,0.011,0.0006,,0.0041,0.0052,0.009,0.006
2012-04-01,0.000,,0.006,0.004,0.001,0.001,0.002,,,0.002,...,0.002,0.0064,0.002,0.004,-0.0031,,0.0152,0.0058,0.008,0.003
2012-05-01,-0.003,,-0.001,-0.001,-0.001,0.0,-0.002,,,-0.001,...,,0.0036,0.002,0.001,-0.0032,,-0.0021,0.0016,0.008,0.005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-01,0.002,0.002,0.001,0.001,0.002,0.001,0.003,,-0.003,0.011,...,0.0,-0.0008,0.002,0.002,0.001,,,0.0,0.06,0.004
2023-07-01,0.002,0.004,-0.004,0.006,0.001,-0.001,0.003,,0.01,0.0,...,0.001,0.0012,0.001,0.009,0.0048,,,-0.002,0.062,0.006
2023-08-01,0.006,0.002,0.003,0.004,0.01,0.002,0.003,,0.004,0.001,...,0.01,0.0023,0.0,0.003,0.0055,,,0.0,0.124,0.003
2023-09-01,0.004,0.003,0.005,-0.001,-0.005,-0.001,0.003,,-0.004,0.005,...,0.006,0.0026,0.0,0.006,0.0044,,,-0.004,0.127,0.009


In [159]:
cpi_yoy = process_monthly_indicator(indicator='CPI (YoY)')
cpi_yoy

6858 indicators read
Missing countries
['Indonesia']
Avg publish delay:  44 days 06:42:00.197938144


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1,2012-01-01 04:00:00,Canada,CPI (YoY),2011-12-01,0.0230
2,2012-01-01 04:00:00,Italy,CPI (YoY),2011-12-01,0.0330
3,2012-01-01 04:00:00,Switzerland,CPI (YoY),2011-12-01,-0.0070
4,2012-01-01 04:00:00,Germany,CPI (YoY),2011-12-01,0.0200
5,2012-01-01 04:00:00,Spain,CPI (YoY),2011-12-01,0.0240
...,...,...,...,...,...
6847,2023-11-17 03:00:00,Austria,CPI (YoY),2023-10-01,0.0536
6849,2023-11-17 05:00:00,Euro Zone,CPI (YoY),2023-10-01,0.0290
6850,2023-11-21 03:30:00,Hong Kong,CPI (YoY),2023-10-01,0.0270
6853,2023-11-21 08:30:00,Canada,CPI (YoY),2023-10-01,0.0310


In [180]:
df_countries = transform_to_monthly_df_countries(cpi_yoy)
df_countries

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Sweden,...,South Korea,Brazil,Saudi Arabia,South Africa,Mexico,Indonesia,Türkiye,Poland,Argentina,Russia
2012-01-01,0.03,0.001,0.036,0.025,,-0.008,0.021,,0.025,0.019,...,,0.062,0.026,0.0625,0.04,,0.1061,0.041,,0.042
2012-02-01,0.029,0.003,0.034,0.026,,-0.009,0.022,,0.025,0.019,...,0.031,0.0585,0.025,0.061,0.039,,0.1043,0.043,,0.037
2012-03-01,0.029,0.005,0.035,0.019,,-0.01,0.022,,0.025,0.015,...,0.026,0.0524,0.025,0.0602,0.0373,,0.1043,0.039,,0.037
2012-04-01,0.027,0.004,0.03,0.02,,-0.01,0.021,,0.024,0.013,...,0.025,0.051,0.025,0.0611,0.0341,,0.1114,0.04,,0.036
2012-05-01,0.023,0.002,0.028,0.012,,-0.01,0.019,,0.021,0.01,...,,0.0499,0.025,0.0565,0.0385,,0.0828,0.036,,0.036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-01,0.03,0.033,0.079,0.028,0.045,0.017,0.064,0.06,0.057,0.093,...,0.027,0.0316,0.027,0.054,0.0506,,,0.115,1.156,0.032
2023-07-01,0.032,0.033,0.068,0.033,0.043,0.016,0.062,,0.046,0.093,...,0.023,0.0399,0.023,0.047,0.0479,,,0.108,1.134,0.043
2023-08-01,0.037,0.032,0.067,0.04,0.049,0.016,0.061,,0.03,0.075,...,0.034,0.0461,0.02,0.048,0.0464,,,0.101,1.252,0.052
2023-09-01,0.037,0.030,0.067,0.038,0.049,0.017,0.045,0.054,0.002,0.065,...,0.037,0.0519,0.017,0.054,0.0445,,,0.082,1.383,0.06


# Industrial Production

In [161]:
industrial_production_mom = process_monthly_indicator(indicator='Industrial Production (MoM)', 
                                                      alt_indicator='Industrial Output (MoM)')
industrial_production_mom

1984 indicators read
Missing countries
['Canada', 'Switzerland', 'Australia', 'Netherlands', 'Spain', 'Hong Kong', 'Belgium', 'Norway', 'Ireland', 'New Zealand', 'Austria', 'China', 'Taiwan', 'India', 'Saudi Arabia', 'South Africa', 'Indonesia', 'Poland', 'Argentina', 'Russia']
Avg publish delay:  43 days 13:00:25.645161290


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-01 04:00:00,South Korea,Industrial Production (MoM),2011-12-01,-0.0100
1,2012-01-01 04:00:00,Brazil,Industrial Production (MoM),2011-12-01,0.0270
2,2012-01-01 04:00:00,Singapore,Industrial Production (MoM),2011-12-01,0.0720
3,2012-01-01 04:00:00,Mexico,Industrial Production (MoM),2011-12-01,0.0049
4,2012-01-01 04:00:00,Sweden,Industrial Production (MoM),2011-12-01,-0.0330
...,...,...,...,...,...
1979,2023-11-10 04:00:00,Italy,Industrial Production (MoM),2023-10-01,0.0000
1980,2023-11-10 07:00:00,Mexico,Industrial Production (MoM),2023-10-01,0.0020
1981,2023-11-14 23:30:00,Japan,Industrial Production (MoM),2023-10-01,0.0050
1982,2023-11-15 05:00:00,Euro Zone,Industrial Production (MoM),2023-10-01,-0.0110


In [163]:
industrial_production_yoy = process_monthly_indicator(indicator='Industrial Production (YoY)', 
                                                      alt_indicator='Industrial Output (YoY)')
industrial_production_yoy

2422 indicators read
Missing countries
['Japan', 'Canada', 'France', 'Australia', 'Netherlands', 'Belgium', 'Norway', 'Israel', 'Ireland', 'New Zealand', 'Saudi Arabia', 'South Africa', 'Indonesia']
Avg publish delay:  44 days 04:16:35.350949628


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-01 04:00:00,United Kingdom,Industrial Production (YoY),2011-12-01,-0.0300
1,2012-01-01 04:00:00,Italy,Industrial Production (YoY),2011-12-01,-0.0230
2,2012-01-01 04:00:00,Euro Zone,Industrial Production (YoY),2011-12-01,-0.0170
3,2012-01-01 04:00:00,Sweden,Industrial Production (YoY),2011-12-01,-0.0550
4,2012-01-01 04:00:00,Poland,Industrial Production (YoY),2011-12-01,0.0767
...,...,...,...,...,...
2417,2023-11-14 21:00:00,China,Industrial Production (YoY),2023-10-01,0.0460
2418,2023-11-15 05:00:00,Euro Zone,Industrial Production (YoY),2023-10-01,-0.0690
2419,2023-11-16 09:15:00,United States,Industrial Production (YoY),2023-10-01,-0.0068
2420,2023-11-17 02:30:00,Switzerland,Industrial Production (YoY),2023-10-01,0.0200


# Retail Sales

In [167]:
retail_sales_mom = process_monthly_indicator(indicator='Retail Sales (MoM)')
retail_sales_mom

2603 indicators read
Missing countries
['Japan', 'France', 'Switzerland', 'Netherlands', 'Spain', 'Hong Kong', 'Belgium', 'Norway', 'Israel', 'Austria', 'China', 'Taiwan', 'India', 'Saudi Arabia', 'South Africa', 'Indonesia', 'Poland', 'Argentina', 'Russia']
Avg publish delay:  46 days 00:35:06.646971935


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1,2012-01-01 04:00:00,Brazil,Retail Sales (MoM),2011-12-01,0.0098
2,2012-01-01 04:00:00,Singapore,Retail Sales (MoM),2011-12-01,-0.0120
3,2012-01-01 04:00:00,New Zealand,Retail Sales (MoM),2011-12-01,0.0000
4,2012-01-01 04:00:00,Mexico,Retail Sales (MoM),2011-12-01,0.0087
5,2012-01-01 04:00:00,Sweden,Retail Sales (MoM),2011-12-01,0.0050
...,...,...,...,...,...
2596,2023-11-13 02:00:00,Türkiye,Retail Sales (MoM),2023-10-01,-0.0070
2597,2023-11-14 16:45:00,New Zealand,Retail Sales (MoM),2023-10-01,-0.0070
2599,2023-11-15 08:30:00,United States,Retail Sales (MoM),2023-10-01,-0.0010
2601,2023-11-17 02:00:00,United Kingdom,Retail Sales (MoM),2023-10-01,-0.0030


In [170]:
retail_sales_yoy = process_monthly_indicator(indicator='Retail Sales (YoY)')
retail_sales_yoy

3196 indicators read
Missing countries
['Canada', 'France', 'Australia', 'Belgium', 'Norway', 'Israel', 'Austria', 'Taiwan', 'India', 'South Korea', 'Saudi Arabia']
Avg publish delay:  46 days 05:06:33.331149689


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1,2012-01-01 04:00:00,United Kingdom,Retail Sales (YoY),2011-12-01,0.0260
2,2012-01-01 04:00:00,Italy,Retail Sales (YoY),2011-12-01,-0.0320
3,2012-01-01 04:00:00,Germany,Retail Sales (YoY),2011-12-01,0.0080
4,2012-01-01 04:00:00,Hong Kong,Retail Sales (YoY),2011-12-01,0.2350
5,2012-01-01 04:00:00,New Zealand,Retail Sales (YoY),2011-12-01,0.0580
...,...,...,...,...,...
3190,2023-11-15 06:00:00,South Africa,Retail Sales (YoY),2023-10-01,0.0090
3191,2023-11-15 08:30:00,United States,Retail Sales (YoY),2023-10-01,0.0248
3193,2023-11-17 02:00:00,United Kingdom,Retail Sales (YoY),2023-10-01,-0.0270
3194,2023-11-22 04:00:00,Poland,Retail Sales (YoY),2023-10-01,0.0480


# PPI

In [175]:
ppi_mom = process_monthly_indicator(indicator='PPI (MoM)')
ppi_mom

2186 indicators read
Missing countries
['United Kingdom', 'Canada', 'Australia', 'Netherlands', 'Spain', 'Hong Kong', 'Singapore', 'Belgium', 'Norway', 'Israel', 'Ireland', 'New Zealand', 'China', 'Taiwan', 'India', 'Saudi Arabia', 'Indonesia', 'Poland', 'Argentina']
Avg publish delay:  47 days 21:47:47.873684210


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
0,2012-01-01 04:00:00,South Korea,PPI (MoM),2011-12-01,0.0010
1,2012-01-01 04:00:00,Japan,PPI (MoM),2011-12-01,0.0000
2,2012-01-01 04:00:00,Russia,PPI (MoM),2011-12-01,0.0020
3,2012-01-01 04:00:00,Sweden,PPI (MoM),2011-12-01,-0.0017
4,2012-01-01 04:00:00,Austria,PPI (MoM),2011-12-01,-0.0030
...,...,...,...,...,...
2179,2023-11-14 02:30:00,Switzerland,PPI (MoM),2023-10-01,0.0020
2181,2023-11-15 08:30:00,United States,PPI (MoM),2023-10-01,-0.0050
2183,2023-11-20 02:00:00,Germany,PPI (MoM),2023-10-01,-0.0010
2184,2023-11-20 16:00:00,South Korea,PPI (MoM),2023-10-01,-0.0010


In [176]:
ppi_yoy = process_monthly_indicator(indicator='PPI (YoY)')
ppi_yoy

2587 indicators read
Missing countries
['United Kingdom', 'Canada', 'Netherlands', 'Singapore', 'Belgium', 'Israel', 'Ireland', 'New Zealand', 'Taiwan', 'India', 'Brazil', 'Saudi Arabia', 'Indonesia', 'Argentina']
Avg publish delay:  45 days 18:42:08.437500


Unnamed: 0,ReportDateTime,Country,Indicator,Period,Value
1,2012-01-01 04:00:00,United States,PPI (YoY),2011-12-01,0.033
3,2012-01-01 04:00:00,Italy,PPI (YoY),2011-12-01,0.049
4,2012-01-01 04:00:00,South Korea,PPI (YoY),2011-12-01,0.043
5,2012-01-01 04:00:00,Switzerland,PPI (YoY),2011-12-01,-0.023
6,2012-01-01 04:00:00,Germany,PPI (YoY),2011-12-01,0.035
...,...,...,...,...,...
2581,2023-11-15 08:30:00,United States,PPI (YoY),2023-10-01,0.013
2583,2023-11-20 02:00:00,Germany,PPI (YoY),2023-10-01,-0.110
2584,2023-11-20 16:00:00,South Korea,PPI (YoY),2023-10-01,0.008
2585,2023-11-21 04:00:00,Poland,PPI (YoY),2023-10-01,-0.041
