# 02_feature_engineering.ipynb  
_Create technical features and visualize distributions_

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Ensure directories exist
os.makedirs("data/processed", exist_ok=True)
os.makedirs("reports/figures", exist_ok=True)

## 1. Load cleaned data

In [2]:
import os
os.chdir('..')  # Change directory to the project root


clean_path = "data/processed/tesla_cleaned.csv"
df = pd.read_csv(clean_path, parse_dates=["Date"]).set_index("Date")
df.head()

Unnamed: 0_level_0,Unnamed: 0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,0,14.858,14.883333,14.217333,14.620667,71466000
2015-01-05,1,14.303333,14.433333,13.810667,14.006,80527500
2015-01-06,2,14.004,14.28,13.614,14.085333,93928500
2015-01-07,3,14.223333,14.318667,13.985333,14.063333,44526000
2015-01-08,4,14.187333,14.253333,14.000667,14.041333,51637500


## 2. Feature engineering
Compute moving averages, rolling volatility, and returns.

In [3]:
def add_features(df):
    df_feat = df.copy()
    
    # Moving averages
    for w in [5, 10, 20]:
        df_feat[f'MA_{w}'] = df_feat['Close'].rolling(w).mean()
    # Rolling volatility (std dev)
    for w in [5, 10, 20]:
        df_feat[f'Vol_{w}'] = df_feat['Close'].rolling(w).std()
    # Daily returns
    df_feat['Return_1D'] = df_feat['Close'].pct_change()
    # 30-day return
    df_feat['Return_30D'] = df_feat['Close'].pct_change(30)
    df_feat.dropna(inplace=True)
    return df_feat

df_feat = add_features(df)
df_feat.shape

#Documentation:
# The `add_features` function enhances the DataFrame with technical indicators:
# - Moving averages for 5, 10, and 20 days which means the average closing price over these periods. useful for identifying trends and smoothing out price fluctuations. 
# - Rolling volatility (standard deviation) for 5, 10, and 20 days Which measures the variability of the stock price over these periods, helping to assess risk. useful for understanding how much the stock price fluctuates over time.
# - Daily returns which measures the percentage change in the stock price from one day to the next. useful for understanding short-term price movements.
# - 30-day returns which measures the percentage change in the stock price over a longer period, useful for understanding medium-term trends.

(2244, 14)

## 3. Visualize rolling statistics

In [4]:
def plot_rolling(df, window):
    fig, ax = plt.subplots(figsize=(12,4))
    ax.plot(df.index, df['Close'], label='Close')
    ax.plot(df.index, df['Close'].rolling(window).mean(), label=f'MA{window}')
    ax.plot(df.index, df['Close'].rolling(window).std(), label=f'STD{window}')
    ax.set_title(f'Rolling Mean & Std (window={window} days)')
    ax.legend()
    fig.savefig(f'reports/figures/02_rolling_{window}.png', bbox_inches='tight')
    plt.close(fig)

for w in [5, 20, 60]:
    plot_rolling(df, w)

## 4. Visualize feature distributions

In [7]:
features = ['MA_5','MA_10','MA_20','Vol_5','Vol_10','Vol_20','Return_1D','Return_30D']
for feat in features:
    fig, ax = plt.subplots(figsize=(6,4))
    sns.histplot(df_feat[feat], bins=50, kde=True, ax=ax)
    ax.set_title(f'Distribution of {feat}')
    fig.savefig(f'reports/figures/02_dist_{feat}.png', bbox_inches='tight')
    plt.close(fig)

## 5. Save engineered features

In [8]:
out_feat = "data/processed/tesla_features.csv"
df_feat.reset_index().to_csv(out_feat, index=False)
print(f"Engineered features saved to {out_feat}")

Engineered features saved to data/processed/tesla_features.csv
