# Feature Engineering

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('data/us_indicators.csv')

In [None]:
# Change date type to datetime format.
df['date'] = pd.to_datetime(df['date'], format='%d/%m/%Y')

In [None]:
df

## Feature Creation

### Interaction

In [None]:
df['cpi_policy_rate'] = df['cpi'] * df['policy_rate']

In [None]:
df

### Date Extraction

In [None]:
df['month'] = df['date'].dt.month

In [None]:
df

### Lag

In [None]:
df['money_supply_lag'] = df['money_supply'].shift(1)

In [None]:
df

### Change

In [None]:
df['cpi_change'] = df['cpi'].diff(1)

In [None]:
df

In [None]:
df['cpi_change_rate'] = df['cpi'].pct_change(1)

In [None]:
df

### Rolling

In [None]:
df['cpi_rolling_mean'] = df['cpi'].rolling(window=10).mean()
df['cpi_rolling_std'] = df['cpi'].rolling(window=10).std()

In [None]:
df.head(20)

## Encoding

### Label Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label_encoder_inflation = LabelEncoder()
df['inflation_target'] = label_encoder_inflation.fit_transform(df['inflation_target'])

In [None]:
label_encoder_policy = LabelEncoder()
df['type_of_monetary_policy'] = label_encoder_policy.fit_transform(df['type_of_monetary_policy'])

In [None]:
df

### One-hot Encoding

In [None]:
df = pd.get_dummies(df, columns=['month'], prefix=['m'])

In [None]:
df

In [None]:
df[['date'] + [x for x in df.columns if 'm_' in x]]

## Transformation

### Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
df['neer'].describe()

In [None]:
neer_scaler = MinMaxScaler()
df['neer'] = neer_scaler.fit_transform(df[['neer']])

In [None]:
df

In [None]:
df['neer'].describe()

### Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
df['money_supply'].describe()

In [None]:
money_supply_scaler = StandardScaler()
df['money_supply'] = money_supply_scaler.fit_transform(df[['money_supply']])

In [None]:
df

In [None]:
df['money_supply'].describe()

## Dimensionality Reduction

In [None]:
from sklearn.decomposition import PCA

In [None]:
# Drop date as PCA only accept number.
# Drop ['money_supply_lag', 'cpi_change', 'cpi_change_rate', 'cpi_rolling_mean', 'cpi_rolling_std'] as PCA not accept NaN.
features = df.drop(columns=['date', 'money_supply_lag', 'cpi_change', 'cpi_change_rate', 'cpi_rolling_mean', 'cpi_rolling_std'])

In [None]:
features

In [None]:
pca = PCA(n_components=2)
pca_data = pca.fit_transform(features)

In [None]:
pca_data

In [None]:
pca_df = pd.DataFrame(data=pca_data)

In [None]:
pca_df

In [None]:
features_df = pd.concat([df[['date']], pca_df], axis=1)
features_df