In [39]:
%load_ext autoreload
%autoreload 2

In [2]:
from seminartools.data import read_inflation, read_commodity, read_gdp_growth, read_interest_rate, read_unemployment
import pandas as pd

dfs = {
    'inflation': read_inflation(mergeable_format=True),
    'commodity': read_commodity(mergeable_format=True),
    'gdp_growth': read_gdp_growth(mergeable_format=True),
    'interest_rate': read_interest_rate(mergeable_format=True),
    'unemployment': read_unemployment(mergeable_format=True),
}

In [213]:
dfs["commodity"]

Unnamed: 0_level_0,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1977-01-01,0.000000,0.052425,0.144757,0.041940,0.112740
1977-04-01,-0.004000,0.043586,-0.066688,-0.045412,-0.060029
1977-07-01,0.014458,0.005967,-0.145538,0.019138,0.045236
1977-10-01,0.003959,0.005931,0.040048,0.046373,0.065161
1978-01-01,-0.001577,0.156056,0.072482,-0.002346,0.139776
...,...,...,...,...,...
2022-04-01,0.039177,-0.028903,-0.029440,-0.181045,-0.072004
2022-07-01,-0.244692,0.486523,-0.088860,-0.154988,-0.090042
2022-10-01,-0.115091,-0.355721,-0.016280,0.100554,0.093372
2023-01-01,-0.020410,-0.585458,0.002037,0.007418,0.043183


In [214]:
df = pd.concat([
    dfs['inflation'],
    #dfs['commodity'],
    dfs['gdp_growth'],
    dfs['interest_rate'],
    dfs['unemployment'],
], axis=1).join( # join level 1 of the multiindex with the commodity data
    dfs['commodity'],
    on='date',
)
df.dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Japan,1977-01-01,0.022951,2.187257,6.333333,2.3,0.00000,0.052425,0.144757,0.041940,0.112740
Canada,1977-01-01,0.023904,1.533830,8.166667,8.8,0.00000,0.052425,0.144757,0.041940,0.112740
United States,1977-01-01,0.017231,1.186233,4.700000,8.2,0.00000,0.052425,0.144757,0.041940,0.112740
Japan,1977-04-01,0.027244,0.703788,5.000000,2.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
Canada,1977-04-01,0.023346,0.510302,7.666667,8.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
...,...,...,...,...,...,...,...,...,...,...
Switzerland,2023-01-01,0.010084,0.309543,1.166667,4.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
Canada,2023-01-01,0.006080,0.610026,4.500000,5.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
Sweden,2023-01-01,0.014955,0.656897,2.833333,7.8,-0.02041,-0.585458,0.002037,0.007418,0.043183
Korea,2023-01-01,0.010538,0.329875,3.500000,3.2,-0.02041,-0.585458,0.002037,0.007418,0.043183


# Figure out countries

In [215]:
from collections import Counter, defaultdict
from pprint import pprint

def get_country_stats(country_lists: dict[str, list[str]]):
    """
    We find 
    1. Overlapping countries
    2. Countries within a country list that do not appear in any other country list
    3. Countries that appear in all other country lists but not in the current one
    """
    n = len(country_lists)

    counter = Counter()
    containing_lists = defaultdict(list)
    for country_list in country_lists.values():
        counter.update(country_list)


    unique_countries = {}
    missing_countries = {}
    for name, country_list in country_lists.items():
        # find countries that are in the current list but do not have 5 counts in the counter
        unique_countries[name] = [country for country in country_list if counter[country] == 1 and country in country_list]
        missing_countries[name] = [country for country in counter if counter[country] == n and country not in country_list]
        for country in country_list:
            containing_lists[country].append(name)

    return counter, unique_countries, missing_countries, containing_lists

counter, unique_countries, missing_countries, containing_lists = get_country_stats({
    'inflation': dfs['inflation'].index.get_level_values(0).unique().tolist(),
    'gdp_growth': dfs['gdp_growth'].index.get_level_values(0).unique().tolist(),
    'interest_rate': dfs['interest_rate'].index.get_level_values(0).unique().tolist(),
    'unemployment': dfs['unemployment'].index.get_level_values(0).unique().tolist(),
})
print("Countries that are contained in all 4 datasets:")
pprint([country for country in counter if counter[country] == 4])
print(f"In total: {len([country for country in counter if counter[country] == 4])} countries")

Countries that are contained in all 4 datasets:
['Portugal',
 'New Zealand',
 'Japan',
 'South Africa',
 'Korea',
 'Sweden',
 'Switzerland',
 'France',
 'Denmark',
 'Netherlands',
 'Türkiye',
 'Germany',
 'Norway',
 'Belgium',
 'United States',
 'Canada',
 'Italy',
 'United Kingdom',
 'Australia',
 'Spain']
In total: 20 countries


In [216]:
containing_lists = sorted(list(dict(containing_lists).items()), key=lambda x: len(x[1]), reverse=True)
print("Countries and the datasets they are contained in:")
containing_lists

Countries and the datasets they are contained in:


[('Portugal', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('New Zealand', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Japan', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('South Africa',
  ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Korea', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Sweden', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Switzerland', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('France', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Denmark', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Netherlands', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Türkiye', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Germany', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),
 ('Norway', ['inflation', 'gdp_growth', 'interest_rate', 'unemployment']),

In [198]:
dfs["gdp_growth"].index.get_level_values(0).unique().tolist()

['United States',
 'United Kingdom',
 'Australia',
 'Austria',
 'Belgium',
 'Denmark',
 'Finland',
 'France',
 'Germany',
 'Greece',
 'Iceland',
 'Ireland',
 'Italy',
 'Japan',
 'Korea',
 'Luxembourg',
 'Mexico',
 'Netherlands',
 'Norway',
 'Portugal',
 'Spain',
 'Sweden',
 'Switzerland',
 'South Africa',
 'Canada',
 'OECD - Europe',
 'G7',
 'NAFTA',
 'OECD - Total',
 'New Zealand',
 'Indonesia',
 'Costa Rica',
 'Slovak Republic',
 'Argentina',
 'Czechia',
 'Estonia',
 'Hungary',
 'Israel',
 'Latvia',
 'Lithuania',
 'Poland',
 'Slovenia',
 'European Union – 27 countries (from 01/02/2020)',
 'Euro area (20 countries)',
 'Bulgaria',
 'Croatia',
 'Romania',
 'Chile',
 'Brazil',
 'India',
 'Türkiye',
 'G20',
 'Russia',
 'Colombia',
 'Saudi Arabia',
 "China (People's Republic of)"]

# 4. work on excluding countries

In [31]:
from seminartools.data import read_merged
df = read_merged().reset_index()
df


Unnamed: 0,country,date,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
0,Japan,1977-01-01,0.022951,2.187257,6.333333,2.3,0.00000,0.052425,0.144757,0.041940,0.112740
1,Canada,1977-01-01,0.023904,1.533830,8.166667,8.8,0.00000,0.052425,0.144757,0.041940,0.112740
2,United States,1977-01-01,0.017231,1.186233,4.700000,8.2,0.00000,0.052425,0.144757,0.041940,0.112740
3,Japan,1977-04-01,0.027244,0.703788,5.000000,2.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
4,Canada,1977-04-01,0.023346,0.510302,7.666667,8.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
...,...,...,...,...,...,...,...,...,...,...,...
2166,Italy,2023-01-01,0.003938,0.552944,3.000000,8.3,-0.02041,-0.585458,0.002037,0.007418,0.043183
2167,Canada,2023-01-01,0.006080,0.610026,4.500000,5.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
2168,Sweden,2023-01-01,0.014955,0.656897,2.833333,7.8,-0.02041,-0.585458,0.002037,0.007418,0.043183
2169,Korea,2023-01-01,0.010538,0.329875,3.500000,3.2,-0.02041,-0.585458,0.002037,0.007418,0.043183


In [32]:
countries = (df["country"].unique())
countries
select = df.country.apply(lambda x : x  not in countries)
dfs = df[select].copy()
dfs.set_index(['country', 'date']).reset_index()
dfs

Unnamed: 0,country,date,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET


In [42]:
from seminartools.data import read_merged

d = read_merged(only_countries = ["Japan"]).reset_index()
d

Unnamed: 0,index,country,date,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
0,0,Japan,1977-01-01,0.022951,2.187257,6.333333,2.3,0.000000,0.052425,0.144757,0.041940,0.112740
1,3,Japan,1977-04-01,0.027244,0.703788,5.000000,2.0,-0.004000,0.043586,-0.066688,-0.045412,-0.060029
2,7,Japan,1977-07-01,0.003120,0.681687,4.750000,1.9,0.014458,0.005967,-0.145538,0.019138,0.045236
3,11,Japan,1977-10-01,0.010886,1.356457,4.250000,1.9,0.003959,0.005931,0.040048,0.046373,0.065161
4,14,Japan,1978-01-01,0.004615,1.889185,4.000000,2.5,-0.001577,0.156056,0.072482,-0.002346,0.139776
...,...,...,...,...,...,...,...,...,...,...,...,...
144,2090,Japan,2022-01-01,0.006876,-0.596637,-0.100000,2.7,0.542498,0.146272,0.163679,0.210497,0.093645
145,2105,Japan,2022-04-01,0.009756,1.081749,-0.100000,2.7,0.039177,-0.028903,-0.029440,-0.181045,-0.072004
146,2128,Japan,2022-07-01,0.010628,-0.096010,-0.100000,2.6,-0.244692,0.486523,-0.088860,-0.154988,-0.090042
147,2152,Japan,2022-10-01,0.011252,0.248075,-0.100000,2.4,-0.115091,-0.355721,-0.016280,0.100554,0.093372


# 2. core inflation


In [4]:
from seminartools.data import read_merged

df_core = read_merged(coreInf=True).reset_index()
df_core

Unnamed: 0,country,date,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
0,United States,1977-01-01,0.012346,1.186233,4.700000,8.2,0.00000,0.052425,0.144757,0.041940,0.112740
1,Japan,1977-01-01,0.016420,2.187257,6.333333,2.3,0.00000,0.052425,0.144757,0.041940,0.112740
2,Canada,1977-01-01,0.018797,1.533830,8.166667,8.8,0.00000,0.052425,0.144757,0.041940,0.112740
3,Japan,1977-04-01,0.030695,0.703788,5.000000,2.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
4,Canada,1977-04-01,0.022140,0.510302,7.666667,8.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
...,...,...,...,...,...,...,...,...,...,...,...
1317,Germany,2023-01-01,0.012626,0.104301,3.000000,3.0,-0.02041,-0.585458,0.002037,0.007418,0.043183
1318,Switzerland,2023-01-01,0.006717,0.309543,1.166667,4.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
1319,Canada,2023-01-01,0.007967,0.610026,4.500000,5.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
1320,Sweden,2023-01-01,0.025074,0.656897,2.833333,7.8,-0.02041,-0.585458,0.002037,0.007418,0.043183


In [5]:
df_headline = read_merged().reset_index()
df_headline

Unnamed: 0,country,date,inflation,gdp_growth,interest_rate,unemployment_rate,commodity_CRUDE_PETRO,commodity_iNATGAS,commodity_iAGRICULTURE,commodity_iMETMIN,commodity_iPRECIOUSMET
0,Japan,1977-01-01,0.022951,2.187257,6.333333,2.3,0.00000,0.052425,0.144757,0.041940,0.112740
1,Canada,1977-01-01,0.023904,1.533830,8.166667,8.8,0.00000,0.052425,0.144757,0.041940,0.112740
2,United States,1977-01-01,0.017231,1.186233,4.700000,8.2,0.00000,0.052425,0.144757,0.041940,0.112740
3,Japan,1977-04-01,0.027244,0.703788,5.000000,2.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
4,Canada,1977-04-01,0.023346,0.510302,7.666667,8.0,-0.00400,0.043586,-0.066688,-0.045412,-0.060029
...,...,...,...,...,...,...,...,...,...,...,...
2166,Italy,2023-01-01,0.003938,0.552944,3.000000,8.3,-0.02041,-0.585458,0.002037,0.007418,0.043183
2167,Canada,2023-01-01,0.006080,0.610026,4.500000,5.4,-0.02041,-0.585458,0.002037,0.007418,0.043183
2168,Sweden,2023-01-01,0.014955,0.656897,2.833333,7.8,-0.02041,-0.585458,0.002037,0.007418,0.043183
2169,Korea,2023-01-01,0.010538,0.329875,3.500000,3.2,-0.02041,-0.585458,0.002037,0.007418,0.043183


In [9]:
len(df_core['country'].unique())

12

In [8]:
df_headline['country'].unique()

array(['Japan', 'Canada', 'United States', 'Australia', 'New Zealand',
       'Switzerland', 'Italy', 'France', 'Portugal', 'Spain', 'Sweden',
       'Denmark', 'United Kingdom', 'Germany', 'Netherlands', 'Norway',
       'Belgium', 'Korea', 'South Africa', 'Türkiye'], dtype=object)