# Data Integration

In [None]:
import pandas as pd

In [None]:
us_df = pd.read_csv('data/us_indicators.csv')
th_df = pd.read_csv('data/th_indicators.csv')

In [None]:
# Change date type to datetime format.
us_df['date'] = pd.to_datetime(us_df['date'], format='%d/%m/%Y')
th_df['date'] = pd.to_datetime(th_df['date'], format='%d/%m/%Y')

In [None]:
us_df

In [None]:
th_df

In [None]:
# Drop us_df rows to demonstrate join types.
us_df = us_df.loc[:200, :].reset_index(drop=True)
us_df

## Merge Data

### Concatenate

#### Vertical

In [None]:
us_cpi_df = us_df.loc[:, ['date', 'cpi']]
us_cpi_df.loc[:, 'country'] = 'us'

th_cpi_df = th_df.loc[:, ['date', 'cpi']]
th_cpi_df.loc[:, 'country'] = 'th'

In [None]:
us_cpi_df

In [None]:
th_cpi_df

In [None]:
pd.concat([us_cpi_df, th_cpi_df], axis=0).reset_index(drop=True)

#### Horizontal

In [None]:
fst_us_df = us_df.iloc[:, :5]
snd_us_df = us_df.iloc[:, 5:]

In [None]:
fst_us_df

In [None]:
snd_us_df

In [None]:
pd.concat([fst_us_df, snd_us_df], axis=1)

### Join

#### Inner Join

In [None]:
us_df.merge(th_df, how='inner', on=['date'], suffixes=('_en', '_th'))

#### Outer Join

In [None]:
us_df.merge(th_df, how='outer', on=['date'], suffixes=('_en', '_th'))

#### Left Join

In [None]:
us_df.merge(th_df, how='left', on=['date'], suffixes=('_en', '_th'))

#### Right Join

In [None]:
us_df.merge(th_df, how='right', on=['date'], suffixes=('_en', '_th'))

## Data Aggregation

In [None]:
us_df.groupby('type_of_monetary_policy')

In [None]:
us_df.groupby('type_of_monetary_policy')['policy_rate'].mean()

In [None]:
agg_rule = {
    'policy_rate': 'mean',
    'neer': 'median',
    'money_supply': 'sum',
    'inflation_target': lambda x: x.mode()[0]  # Use mode for the string column
}

us_df.groupby('type_of_monetary_policy').agg(agg_rule)

In [None]:
agg_rule = {
    'policy_rate': 'mean',
    'neer': 'median',
    'money_supply': 'sum'
}

us_df.groupby(['type_of_monetary_policy', 'inflation_target']).agg(agg_rule)