In [6]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_ta as ta
import os

plt.style.use("ggplot")

DATA_PATH = "../data/raw/"
FIG_PATH = "../reports/figures/task2/"
os.makedirs(FIG_PATH, exist_ok=True)

STOCKS = ["AAPL", "AMZN", "GOOG", "META", "MSFT", "NVDA"]

def load_stock(symbol):
    df = pd.read_csv(f"{DATA_PATH}{symbol}.csv")
    # Standardize column names
    df.columns = [c.lower() for c in df.columns]
    
    # Ensure datetime
    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"])
    elif "timestamp" in df.columns:
        df["date"] = pd.to_datetime(df["timestamp"])
    
    df = df.sort_values("date")
    df = df.reset_index(drop=True)
    return df

# Load all stocks
data = {symbol: load_stock(symbol) for symbol in STOCKS}

# -----------------------------------------
# Technical Indicators Function
# -----------------------------------------

def compute_indicators(df):
    df = df.copy()

    # --- SMA & EMA ---
    df["sma_20"] = ta.sma(df["close"], length=20)
    df["ema_20"] = ta.ema(df["close"], length=20)

    # --- RSI ---
    df["rsi_14"] = ta.rsi(df["close"], length=14)

    # --- MACD (safe for all versions) ---
    macd = ta.macd(df["close"])
    if isinstance(macd, pd.DataFrame):
        df["macd"] = (
            macd["MACD_12_26_9"]
            if "MACD_12_26_9" in macd.columns
            else macd.iloc[:, 0]
        )
        df["macd_signal"] = (
            macd["MACDs_12_26_9"]
            if "MACDs_12_26_9" in macd.columns and macd.shape[1] > 1
            else macd.iloc[:, 1] if macd.shape[1] > 1 else np.nan
        )
        df["macd_hist"] = (
            macd["MACDh_12_26_9"]
            if "MACDh_12_26_9" in macd.columns and macd.shape[1] > 2
            else macd.iloc[:, 2] if macd.shape[1] > 2 else np.nan
        )
    else:
        df["macd"] = df["macd_signal"] = df["macd_hist"] = np.nan

    # --- Bollinger Bands (safe lookup) ---
    bb = ta.bbands(df["close"], length=20, std=2)
    if isinstance(bb, pd.DataFrame):
        df["bb_upper"] = (
            bb[[c for c in bb.columns if "BBU" in c or "upper" in c.lower()]].iloc[:, 0]
            if any("BBU" in c or "upper" in c.lower() for c in bb.columns)
            else np.nan
        )
        df["bb_middle"] = (
            bb[[c for c in bb.columns if "BBM" in c or "mid" in c.lower()]].iloc[:, 0]
            if any("BBM" in c or "mid" in c.lower() for c in bb.columns)
            else np.nan
        )
        df["bb_lower"] = (
            bb[[c for c in bb.columns if "BBL" in c or "lower" in c.lower()]].iloc[:, 0]
            if any("BBL" in c or "lower" in c.lower() for c in bb.columns)
            else np.nan
        )
    else:
        df["bb_upper"] = df["bb_middle"] = df["bb_lower"] = np.nan

    # --- Daily returns ---
    df["returns"] = df["close"].pct_change()

    # --- 20-day rolling volatility ---
    df["volatility_20"] = df["returns"].rolling(20).std()

    return df


# Apply indicators to all stocks
for symbol in STOCKS:
    data[symbol] = compute_indicators(data[symbol])

# -----------------------------------------
# Plot Helper Function
# -----------------------------------------

def save_plot(symbol, df, col, title):
    plt.figure(figsize=(10,5))
    plt.plot(df["date"], df[col])
    plt.title(f"{symbol} â€” {title}")
    plt.xlabel("Date")
    plt.ylabel(col)
    plt.tight_layout()
    plt.savefig(f"{FIG_PATH}{symbol}_{col}.png")
    plt.close()

# Generate all indicator plots
for symbol in STOCKS:
    df = data[symbol]
    save_plot(symbol, df, "close", "Closing Price")
    save_plot(symbol, df, "sma_20", "20-Day SMA")
    save_plot(symbol, df, "ema_20", "20-Day EMA")
    save_plot(symbol, df, "rsi_14", "RSI (14)")
    save_plot(symbol, df, "macd", "MACD")
    save_plot(symbol, df, "macd_signal", "MACD Signal Line")
    save_plot(symbol, df, "bb_upper", "Bollinger Bands (Upper)")
    save_plot(symbol, df, "volatility_20", "20-Day Rolling Volatility")

# -----------------------------------------
# Comparative Summary Table
# -----------------------------------------

summary = []

for symbol in STOCKS:
    df = data[symbol]
    summary.append({
        "Stock": symbol,
        "Mean Daily Return": df["returns"].mean(),
        "Volatility (20D)": df["volatility_20"].mean(),
        "Avg RSI": df["rsi_14"].mean(),
        "MACD Last": df["macd"].iloc[-1]
    })

summary_df = pd.DataFrame(summary)
summary_df


Unnamed: 0,Stock,Mean Daily Return,Volatility (20D),Avg RSI,MACD Last
0,AAPL,0.001289,0.016551,56.218984,1.559539
1,AMZN,0.001303,0.019922,54.49069,2.782032
2,GOOG,0.00091,0.015837,54.478373,1.84282
3,META,0.001082,0.02218,53.444982,8.193282
4,MSFT,0.000996,0.015278,55.075317,2.65364
5,NVDA,0.001877,0.026299,54.612575,0.697532


Rank Stocks by Performance

In [7]:
ranked = summary_df.sort_values("Mean Daily Return", ascending=False)
ranked

Unnamed: 0,Stock,Mean Daily Return,Volatility (20D),Avg RSI,MACD Last
5,NVDA,0.001877,0.026299,54.612575,0.697532
1,AMZN,0.001303,0.019922,54.49069,2.782032
0,AAPL,0.001289,0.016551,56.218984,1.559539
3,META,0.001082,0.02218,53.444982,8.193282
4,MSFT,0.000996,0.015278,55.075317,2.65364
2,GOOG,0.00091,0.015837,54.478373,1.84282


Export Summary Table

In [8]:
summary_path = "../reports/task2_summary.csv"
summary_df.to_csv(summary_path, index=False)

summary_path

'../reports/task2_summary.csv'