In [1]:
import pandas as pd
import numpy as np
import os

print("Setup complete.")

Setup complete.


In [2]:
# Load raw data
df = pd.read_csv('../data/raw/eur_gbp_historical.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

# 1. Create Lag Features (Looking back)
df['lag_1'] = df['rate'].shift(1)
df['lag_7'] = df['rate'].shift(7)

# 2. Moving Averages (Smooths out the noise)
df['ma_7'] = df['rate'].rolling(window=7).mean()
df['ma_30'] = df['rate'].rolling(window=30).mean()

# 3. Volatility (Risk measure)
df['volatility_7'] = df['rate'].rolling(window=7).std()

# 4. Calendar Features (Seasonal patterns)
df['day_of_week'] = df['date'].dt.dayofweek
df['is_month_end'] = df['date'].dt.is_month_end.astype(int)

# Drop rows with NaN values created by the rolling windows
df_cleaned = df.dropna()

print(f"Features created. New shape: {df_cleaned.shape}")
df_cleaned.head()

Features created. New shape: (4169, 13)


Unnamed: 0,date,open,high,low,rate,volume,lag_1,lag_7,ma_7,ma_30,volatility_7,day_of_week,is_month_end
29,2010-02-11,0.8806,0.884,0.87,0.8716,0,0.8807,0.8736,0.874986,0.880377,0.003522,3,0
30,2010-02-12,0.8717,0.8721,0.8658,0.866,0,0.8716,0.8731,0.873971,0.879593,0.004906,4,0
31,2010-02-15,0.869,0.87,0.867,0.8679,0,0.866,0.8714,0.873471,0.878683,0.005368,0,0
32,2010-02-16,0.8679,0.8743,0.8678,0.8716,0,0.8679,0.8735,0.8732,0.877797,0.005414,1,0
33,2010-02-17,0.8718,0.8733,0.8668,0.8676,0,0.8716,0.8766,0.871914,0.876757,0.00554,2,0


In [3]:
os.makedirs('../data/processed', exist_ok=True)
# Save
output_file = '../data/processed/features_engineered.csv'
df_cleaned.to_csv(output_file, index=False)

print(f"Success! Processed data saved to {output_file}")

Success! Processed data saved to ../data/processed/features_engineered.csv
