##Preparation

In [58]:
import pandas as pd


excel_file_path ='/content/EUR_AUD Historical Data.csv'
df = pd.read_csv(excel_file_path)

print(df.columns)

Index(['Date', 'Price (EURAUD)', 'Inflation Rate(%)', 'Oil Prices',
       'World Uncertainty Index', 'Unnamed: 5', 'Unnamed: 6'],
      dtype='object')


In [59]:
df['forex_daily_returns'] = df['Price (EURAUD)'].pct_change() * 100

display(df[['Date', 'Price (EURAUD)', 'forex_daily_returns']].head())

Unnamed: 0,Date,Price (EURAUD),forex_daily_returns
0,1/1/2021,1.575,
1,1/4/2021,1.5979,1.453968
2,1/5/2021,1.5841,-0.863634
3,1/6/2021,1.5795,-0.290386
4,1/7/2021,1.5792,-0.018993


In [60]:
df = df.drop(columns=['Unnamed: 5', 'Unnamed: 6'], errors='ignore')
df['World Uncertainty Index'] = df['World Uncertainty Index'].fillna(method='ffill')
df['Inflation Rate(%)'] = df['Inflation Rate(%)'].fillna(method='ffill')

df_cleaned = df.dropna()

display(df_cleaned.head())

  df['World Uncertainty Index'] = df['World Uncertainty Index'].fillna(method='ffill')
  df['Inflation Rate(%)'] = df['Inflation Rate(%)'].fillna(method='ffill')


Unnamed: 0,Date,Price (EURAUD),Inflation Rate(%),Oil Prices,World Uncertainty Index,forex_daily_returns
1,1/4/2021,1.5979,0.9,50.37,3.45,1.453968
2,1/5/2021,1.5841,0.9,53.16,3.45,-0.863634
3,1/6/2021,1.5795,0.9,53.8,3.45,-0.290386
4,1/7/2021,1.5792,0.9,53.7,3.45,-0.018993
5,1/8/2021,1.5743,0.9,55.51,3.45,-0.310284


In [61]:
X = df_cleaned[['Inflation Rate(%)', 'Oil Prices', 'World Uncertainty Index']]
y = df_cleaned['forex_daily_returns']

display(X.head())
display(y.head())

Unnamed: 0,Inflation Rate(%),Oil Prices,World Uncertainty Index
1,0.9,50.37,3.45
2,0.9,53.16,3.45
3,0.9,53.8,3.45
4,0.9,53.7,3.45
5,0.9,55.51,3.45


Unnamed: 0,forex_daily_returns
1,1.453968
2,-0.863634
3,-0.290386
4,-0.018993
5,-0.310284


In [62]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (704, 3)
X_test shape: (177, 3)
y_train shape: (704,)
y_test shape: (177,)


## Train Decision Tree Model

### Subtask:
Train a Decision Tree Regressor model with the current `X_train` and `y_train` variables.


In [63]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor(random_state=42)

dt_model.fit(X_train, y_train)

print("Decision Tree Regressor model trained successfully.")

Decision Tree Regressor model trained successfully.


## Evaluate Decision Tree Performance

### Subtask:
Evaluate the performance of the Decision Tree model using Mean Squared Error (MSE) and R-squared (R2) score on the test set.


In [64]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred_dt = dt_model.predict(X_test)

mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print(f"Decision Tree Mean Squared Error (MSE): {mse_dt:.4f}")
print(f"Decision Tree R-squared (R2) Score: {r2_dt:.4f}")

Decision Tree Mean Squared Error (MSE): 0.4675
Decision Tree R-squared (R2) Score: -0.7748


## Analyze Decision Tree Feature Importance

### Subtask:
Analyze and display the feature importances for the trained Decision Tree Regressor model.


In [65]:
feature_importances_dt = dt_model.feature_importances_

feature_names_dt = X_train.columns

importances_df_dt = pd.DataFrame({'feature': feature_names_dt, 'importance': feature_importances_dt})

importances_df_dt = importances_df_dt.sort_values('importance', ascending=False)

print("Decision Tree Feature Importance:")
display(importances_df_dt)

Decision Tree Feature Importance:


Unnamed: 0,feature,importance
1,Oil Prices,0.800733
0,Inflation Rate(%),0.128627
2,World Uncertainty Index,0.07064


## OLS-based Feature Filtering

### Subtask:
Filter out features based on OLS regression results. Specifically, remove 'World Uncertainty Index' from the `X` variables, and then prepare the data for retraining the Decision Tree Regressor.

**Reasoning**:
Based on the OLS regression results and the task requirement, I will create a new feature set `X_filtered` by dropping the 'World Uncertainty Index' column. Then, I will split this new `X_filtered` and the original `y` into training and testing sets to prepare for retraining the Decision Tree model.



In [66]:
X_filtered = df_cleaned[['Inflation Rate(%)', 'Oil Prices']]
y = df_cleaned['forex_daily_returns']

# Split the data into training and testing sets
X_train_filtered, X_test_filtered, y_train, y_test = train_test_split(X_filtered, y, test_size=0.2, random_state=42)

print("X_train_filtered shape:", X_train_filtered.shape)
print("X_test_filtered shape:", X_test_filtered.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

display(X_filtered.head())
display(y.head())

X_train_filtered shape: (704, 2)
X_test_filtered shape: (177, 2)
y_train shape: (704,)
y_test shape: (177,)


Unnamed: 0,Inflation Rate(%),Oil Prices
1,0.9,50.37
2,0.9,53.16
3,0.9,53.8
4,0.9,53.7
5,0.9,55.51


Unnamed: 0,forex_daily_returns
1,1.453968
2,-0.863634
3,-0.290386
4,-0.018993
5,-0.310284


## Retrain Decision Tree with Filtered Features

### Subtask:
Retrain a Decision Tree Regressor model using the `X_train_filtered` and `y_train` variables.


In [67]:
from sklearn.tree import DecisionTreeRegressor

dt_model_filtered = DecisionTreeRegressor(random_state=42)

dt_model_filtered.fit(X_train_filtered, y_train)

print("Decision Tree Regressor model retrained successfully with filtered features.")

Decision Tree Regressor model retrained successfully with filtered features.


## Evaluate Retrained Decision Tree Performance

### Subtask:
Evaluate the performance of the retrained Decision Tree model using Mean Squared Error (MSE) and R-squared (R2) score on the filtered test set (`X_test_filtered`, `y_test`).

In [68]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred_dt_filtered = dt_model_filtered.predict(X_test_filtered)

mse_dt_filtered = mean_squared_error(y_test, y_pred_dt_filtered)
r2_dt_filtered = r2_score(y_test, y_pred_dt_filtered)

print(f"Retrained Decision Tree Mean Squared Error (MSE): {mse_dt_filtered:.4f}")
print(f"Retrained Decision Tree R-squared (R2) Score: {r2_dt_filtered:.4f}")

Retrained Decision Tree Mean Squared Error (MSE): 0.4462
Retrained Decision Tree R-squared (R2) Score: -0.6943


## Analyze Retrained Decision Tree Feature Importance

### Subtask:
Analyze and display the feature importances for the retrained Decision Tree Regressor model, which was trained with OLS-filtered features.

In [69]:
feature_importances_dt_filtered = dt_model_filtered.feature_importances_

feature_names_dt_filtered = X_train_filtered.columns

importances_df_dt_filtered = pd.DataFrame({'feature': feature_names_dt_filtered, 'importance': feature_importances_dt_filtered})

importances_df_dt_filtered = importances_df_dt_filtered.sort_values('importance', ascending=False)

print("Retrained Decision Tree Feature Importance:")
display(importances_df_dt_filtered)

Retrained Decision Tree Feature Importance:


Unnamed: 0,feature,importance
1,Oil Prices,0.817313
0,Inflation Rate(%),0.182687


In [70]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
import numpy as np

X = df_cleaned[['Inflation Rate(%)', 'Oil Prices', 'World Uncertainty Index']]
y = df_cleaned['forex_daily_returns']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


lasso_model = Lasso(alpha=1.0, fit_intercept=False)
lasso_model.fit(X_scaled, y)


coefficients = lasso_model.coef_


print("Lasso Coefficients (without intercept):")
for feature, coef in zip(X.columns, coefficients):
    print(f"{feature}: {coef:.4f}")

Lasso Coefficients (without intercept):
Inflation Rate(%): 0.0000
Oil Prices: -0.0000
World Uncertainty Index: 0.0000


## PCA

## Perform PCA

### Subtask:
Perform Principal Component Analysis (PCA) on the features `Inflation Rate(%)`, `Oil Prices`, and `World Uncertainty Index` to identify principal components and their loading factors.


In [71]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd


X = df_cleaned[['Inflation Rate(%)', 'Oil Prices', 'World Uncertainty Index']]


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


pca = PCA()
pca.fit(X_scaled)


loading_factors = pca.components_.T


loading_factors_df = pd.DataFrame(loading_factors, columns=[f'PC{i+1}' for i in range(pca.n_components_)], index=X.columns)


print("PCA Loading Factors:")
display(loading_factors_df)

PCA Loading Factors:


Unnamed: 0,PC1,PC2,PC3
Inflation Rate(%),0.70965,0.220623,0.66912
Oil Prices,0.7043,-0.247631,-0.665312
World Uncertainty Index,0.018912,0.9434,-0.331117
