In [3]:
import sys
import pandas as pd

sys.path.append("../..")

In [5]:
import pandas as pd
import os
# Make sure to import the function from your build_features module
from src.features.build_features import create_features 

# --- Configuration ---
CLEANED_DATA_PATH = "../../data/processed/btc_daily_cleaned.csv"
FEATURES_DATA_PATH = "../../data/processed/btc_daily_features.csv"

# --- Main Execution ---
if __name__ == "__main__":
    print("--- Running Feature Engineering Script ---")

    # Ensure the processed directory exists for saving features
    PROCESSED_DIR = os.path.dirname(FEATURES_DATA_PATH)
    if PROCESSED_DIR and not os.path.exists(PROCESSED_DIR):
        os.makedirs(PROCESSED_DIR)
        print(f"Created directory: {PROCESSED_DIR}")

    # 1. Load the cleaned data
    print(f"Loading cleaned data from: {CLEANED_DATA_PATH}")
    try:
        df_cleaned = pd.read_csv(CLEANED_DATA_PATH, index_col='Date', parse_dates=True)
        print("Cleaned data loaded successfully.")
        print(f"Original cleaned DataFrame shape: {df_cleaned.shape}")
    except FileNotFoundError:
        print(f"Error: {CLEANED_DATA_PATH} not found. Please ensure cleaning step was run and file exists.")
        df_cleaned = None
    except Exception as e:
        print(f"An error occurred loading the cleaned data: {e}")
        df_cleaned = None

    # 2. Create features if data was loaded successfully
    if df_cleaned is not None and not df_cleaned.empty:
        # You can choose which library to use by passing the argument:
        # e.g., df_with_features = create_features(df_cleaned, technical_indicator_library='talib')
        df_with_features = create_features(df_cleaned, technical_indicator_library='pandas') 

        # 3. Save the DataFrame with features
        if df_with_features is not None and not df_with_features.empty:
            print(f"\nSaving DataFrame with features to: {FEATURES_DATA_PATH}")
            try:
                df_with_features.to_csv(FEATURES_DATA_PATH)
                print("DataFrame with features saved successfully.")
                print(f"Final DataFrame shape with features: {df_with_features.shape}")
                print("\n--- First 5 rows of DataFrame with Features ---")
                print(df_with_features.head())
            except Exception as e:
                print(f"An error occurred while saving features: {e}")
        else:
            print("Feature engineering returned an empty DataFrame. Skipping save.")
    else:
        print("Skipping feature engineering: Cleaned data could not be loaded.")

    print("\n--- Feature Engineering Script Finished ---")

--- Running Feature Engineering Script ---
Loading cleaned data from: ../../data/processed/btc_daily_cleaned.csv
Cleaned data loaded successfully.
Original cleaned DataFrame shape: (4027, 5)

--- Starting Feature Engineering ---
Calculating Daily Returns...
  - Calculated 'daily_return'.
Calculating Technical Indicators using 'pandas'...
  - Calculated indicators using Pandas.
Creating Lagged Features...
  - Created lagged features.
Creating Interaction Features...
  - Created 'Price_vs_SMA10' feature.
  - Created 'Volume_vs_Avg_Vol10' feature.
Handling NaNs introduced by feature engineering...
  - Removed 19 rows with NaNs.
  - DataFrame shape after dropping NaNs: (4008, 23)
Feature engineering completed.

Saving DataFrame with features to: ../../data/processed/btc_daily_features.csv
DataFrame with features saved successfully.
Final DataFrame shape with features: (4008, 23)

--- First 5 rows of DataFrame with Features ---
                                 Open        High         Low  