In [None]:
#featureEngineeringAnalysis.ipynb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from common import (
    load_data, feature_engineering
)

In [None]:
# Load the cryptocurrency data
file_path = 'BTC.csv'  # Change the file path as needed
data = load_data(file_path)

In [None]:
# Feature Engineering
data = feature_engineering(data)

In [35]:
# Define the target variable (Close Price) and the features (Lag, Rolling averages, Day/Month/Quarter)
X = data[['lag1', 'lag2', 'MA3', 'day_of_week', 'month', 'quarter']]
y = data['Close']

Mean Squared Error: 773219.7190431473
Root Mean Squared Error: 879.329130100412
Feature Importances:  [(0.5026865316771938, 'lag1'), (0.49460623660618397, 'MA3'), (0.002400304022072601, 'lag2'), (0.00013981787132228007, 'day_of_week'), (0.00010838184778940627, 'month'), (5.8727975437974556e-05, 'quarter')]


In [None]:
# Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
# Initialize the RandomForest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")

In [None]:
# Feature Importance
importances = rf_model.feature_importances_
feature_names = X.columns
sorted_importances = sorted(zip(importances, feature_names), reverse=True)
print("Feature Importances: ", sorted_importances)