# MA/RSI Basics 📈
Objective: Build a minimal, reusable template to compute and visualize basic trend (SMA/EMA) and momentum (RSI) indicators on ETF OHLCV data, using your existing daily_data / minute_data dictionaries.

## 1. Setup

In [28]:
import os,sys
import duckdb
from pathlib import Path
import pandas as pd
import json
# import backtrader as bt

In [29]:
PROJECT_ROOT = Path.cwd().parents[0]

if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

print(f"Project Root: {PROJECT_ROOT}")

Project Root: /Users/luyanda/workspace/QuantTrade


In [30]:
from utils.charts import render_lightweight_chart
from utils.duck import to_bt_daily_duckdb, to_bt_minute_duckdb
from utils.features import add_mas_duckdb, add_rsi_duckdb, add_emas_duckdb

In [31]:
# DB_DAILY = PROJECT_ROOT / "data" / "processed" / "dolt" / "stocks.duckdb"
# print(f"DB_DAILY: {DB_DAILY}")
# con_daily = duckdb.connect(str(DB_DAILY))
# tables = [t[0] for t in con_daily.execute("SHOW TABLES").fetchall()]
# print("📋 Tables:", tables)

In [32]:
DB_MINUTE = PROJECT_ROOT / "data" / "processed" / "alpaca" / "price_minute_alpaca.duckdb"
print(f"DB_MINUTE: {DB_MINUTE}")
con_minute = duckdb.connect(str(DB_MINUTE))
tables = [t[0] for t in con_minute.execute("SHOW TABLES").fetchall()]
print("📋 Tables:", tables)

DB_MINUTE: /Users/luyanda/workspace/QuantTrade/data/processed/alpaca/price_minute_alpaca.duckdb
📋 Tables: ['alpaca_minute']


In [33]:
ETFS = ["SPY", "QQQ"]
CHARTS_DIR = PROJECT_ROOT / "charts" 
IND_MA_WINDOWS = [20, 50, 200]
IND_EMA_WINDOWS = [20]
IND_RSI_PERIOD = 14
IND_RSI_BOUNDS = (30,70)

## 2. Helpers

## 3. Data Ingestion

In [34]:
# --- Ingest latest daily data ---
# daily_data = {
#     sym: to_bt_daily_duckdb(con_daily, sym, table="ohlcv", date_col="date", symbol_col="act_symbol")
#     for sym in ETFS
# }

# for symbol in ETFS:
#     print(daily_data[symbol].tail(1))


In [35]:
# --- Ingest latest minute-level data ---
minute_data = {sym: to_bt_minute_duckdb(con_minute, "alpaca_minute", sym) for sym in ETFS}

for symbol in ETFS:
    print(minute_data[symbol].tail(1))

                       open    high     low   close  volume  trade_count  \
datetime                                                                   
2025-08-13 14:36:00  644.34  644.34  644.34  644.34   100.0          1.0   

                       vwap  
datetime                     
2025-08-13 14:36:00  644.34  
                       open    high     low   close  volume  trade_count  \
datetime                                                                   
2025-08-13 14:55:00  582.45  582.45  582.45  582.45   340.0          2.0   

                       vwap  
datetime                     
2025-08-13 14:55:00  582.45  


## 4. Data Quality Checks

U.S. Market (SPY, QQQ)
Assuming regular NYSE/Nasdaq trading hours:

| **Session**     | **Hours (ET)**   | **Duration** |
| --------------- | ---------------- | ------------ |
| Regular session | 09:30 – 16:00 ET | 6.5 hours    |
|                 |                  | 390 minutes  |

Expect around 390 rows per ETF

In [36]:
for symbol in ETFS:
    df = minute_data[symbol]
    print(f"\n🔍 {symbol}")
    print(f"  • Rows: {len(df)}")
    print(f"  • Date Range: {df.index.min().date()} → {df.index.max().date()}")
    print(f"  • Timezone-aware: {df.index.tz is not None}")
    # print(f"  • Missing 'close': {df['close'].isna().sum()}")

    # --- Drop timezone if needed ---
    df = df.copy()
    if df.index.tz is not None:
        df.index = df.index.tz_localize(None)

    # --- Identify all available intraday dates ---
    df["date"] = df.index.normalize()
    available_dates = df["date"].unique()

    # --- Construct full expected range (business days) ---
    expected_dates = pd.date_range(
        start=df.index.min().normalize(),
        end=df.index.max().normalize(),
        freq='B'
    )

    # --- Missing trading days entirely ---
    missing_dates = sorted(set(expected_dates) - set(available_dates))
    print(f"  • Missing Intraday Dates: {len(missing_dates)}")
    # if missing_dates:
    #     print("    Example:", missing_dates[:5])

    # --- Check for partial trading days (fewer than 390 rows) ---
    counts = df.groupby("date").size()
    partial_days = counts[counts < 390]
    print(f"  • Partial Intraday Days (<390 rows): {len(partial_days)}")
    # if not partial_days.empty:
    #     print("    Example:", partial_days.head())



🔍 SPY
  • Rows: 192320
  • Date Range: 2023-08-09 → 2025-08-13
  • Timezone-aware: False
  • Missing Intraday Dates: 22
  • Partial Intraday Days (<390 rows): 279

🔍 QQQ
  • Rows: 187087
  • Date Range: 2023-08-09 → 2025-08-13
  • Timezone-aware: False
  • Missing Intraday Dates: 22
  • Partial Intraday Days (<390 rows): 306


## 5. Feature Engineering

In [37]:
minute_data_ema = add_emas_duckdb(minute_data, con_minute, [20])
print(minute_data_ema["SPY"].tail(1))

                       open    high     low   close  volume  trade_count  \
datetime                                                                   
2025-08-13 14:36:00  644.34  644.34  644.34  644.34   100.0          1.0   

                       vwap       ema20  
datetime                                 
2025-08-13 14:36:00  644.34  642.720931  


In [38]:
df_1 = add_mas_duckdb(minute_data_ema, con_minute, windows=[20, 50,200], price_col="close")
print(df_1["SPY"].tail(1))

                       open    high     low   close  volume  trade_count  \
datetime                                                                   
2025-08-13 14:36:00  644.34  644.34  644.34  644.34   100.0          1.0   

                       vwap       ema20     ma20      ma50       ma200  
datetime                                                                
2025-08-13 14:36:00  644.34  642.720931  642.645  642.4728  642.115025  


In [39]:
print(df_2)

{'QQQ':                        open    high     low   close  volume  trade_count  \
datetime                                                                   
2023-08-09 14:27:00  372.80  372.80  372.80  372.80   220.0          2.0   
2023-08-09 14:29:00  372.64  372.64  372.64  372.64   200.0          1.0   
2023-08-09 14:37:00  373.07  373.07  373.07  373.07   100.0          1.0   
2023-08-09 14:45:00  372.54  372.54  372.54  372.54   124.0          2.0   
2023-08-09 14:52:00  372.62  372.62  372.62  372.62   466.0          2.0   
...                     ...     ...     ...     ...     ...          ...   
2025-08-13 14:31:00  581.85  581.85  581.85  581.85   391.0          3.0   
2025-08-13 14:34:00  582.03  582.03  582.03  582.03   360.0          2.0   
2025-08-13 14:40:00  582.14  582.14  582.14  582.14   100.0          1.0   
2025-08-13 14:45:00  582.21  582.21  582.21  582.21   220.0          3.0   
2025-08-13 14:55:00  582.45  582.45  582.45  582.45   340.0          2.0   

   

In [40]:
df_2 = add_rsi_duckdb(df_1, con_minute, period=14)
print(df_2["SPY"].tail(1))

                       open    high     low   close  volume  trade_count  \
datetime                                                                   
2025-08-13 14:36:00  644.34  644.34  644.34  644.34   100.0          1.0   

                       vwap       ema20     ma20      ma50       ma200  \
datetime                                                                 
2025-08-13 14:36:00  644.34  642.720931  642.645  642.4728  642.115025   

                         rsi14  
datetime                        
2025-08-13 14:36:00  82.116788  


In [41]:
# render_lightweight_chart(
#     df_2["SPY"],
#     symbol="SPY",
#     out_html=CHARTS_DIR/"spy_ma_ema_rsi.html",
#     ma_windows=IND_MA_WINDOWS,
#     rsi_period=IND_RSI_PERIOD,            
#     rsi_bounds=None,
#     timeframes=["1m","5m","15m","1h","1d"],
#     default_tf="5m",
#     watermark_text="SPY — {tf}",
#     watermark_opacity=0.07,
# )

# IND_MA_WINDOWS = [20, 50, 200]
# IND_EMA_WINDOWS = [20]
# IND_RSI_PERIOD = 14

In [45]:
render_lightweight_chart(
    df_2["SPY"],
    symbol="SPY",
    out_html=CHARTS_DIR/"etf_basics_SPY.html",
    theme="dark",
    ma_windows=IND_MA_WINDOWS,  
    ema_windows=IND_EMA_WINDOWS,          
    rsi_period=IND_RSI_PERIOD,             
    rsi_bounds=IND_RSI_BOUNDS,
    timeframes=["1d", "1h", "15m"],
    default_tf="1d",
    watermark_text="SPY — {tf}",
)

TypeError: render_lightweight_chart() got an unexpected keyword argument 'ema_windows'

In [None]:
print(df_2["SPY"].columns)

Index(['open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap',
       'ema20', 'ma20', 'ma50', 'ma200', 'rsi14'],
      dtype='object')


In [None]:
# class PandasDataWithMAs(bt.feeds.PandasData):
#     # add new indicator lines available as data.ma20, data.ma50
#     lines = ('ma20', 'ma50',)
#     # map by column name (expects columns 'ma20' and 'ma50' to exist in df)
#     params = (
#         ('ma20', -1),   # -1 tells Backtrader to pick the column with the same name
#         ('ma50', -1),
#     )

# # Use it like:
# df = minute_data_ma["SPY"]  # already has ma20, ma50 from your DuckDB helper
# datafeed = PandasDataWithMAs(dataname=df[['open','high','low','close','volume','ma20','ma50']])
# cerebro = bt.Cerebro()
# cerebro.adddata(datafeed)

# class UsePrecomputed(bt.Strategy):
#     def __init__(self):
#         # now you can access self.data.ma20[0], self.data.ma50[0] in signals
#         pass