In [None]:
# ============================================================
# FINAL HYBRID LIVE STOCK PREDICTION ENGINE (WITH SEPARATE PRETRAINING)
# Daily model: (60 D), pretrained separately
# Minute model: last 6 days (1m), pretrained separately
# 7-day prediction: using daily model (instantly after daily pretraining)
# Live prediction: hybrid with pretrained LSTMs (rolling window)
# ============================================================

import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import time
from datetime import datetime, timedelta

# -----------------------------
# CONFIG
# -----------------------------
TICKER = "AAPL"
SEQ_LEN_MIN = 60          
SEQ_LEN_DAY = 60         
EPOCHS = 8
BATCH_SIZE = 32
MIN_DB_FILE = "minute_database.csv"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ============================================================
# 1. FETCH DAILY DATA (PAST 2 YEARS)
# ============================================================
daily_df = yf.download(
    TICKER,
    period="2y",
    interval="1d",
    auto_adjust=False,
    progress=False
)

daily_df.index = daily_df.index.tz_localize(None)
if isinstance(daily_df.columns, pd.MultiIndex):
    daily_df.columns = daily_df.columns.get_level_values(0)

daily_df = daily_df[['Open','High','Low','Close','Volume']]
print("‚úÖ Daily data loaded:", daily_df.shape)

# ============================================================
# 2. FETCH MINUTE DATA (LAST 6 FULL DAYS, NO TODAY)
# ============================================================
today = datetime.now().date()
end_day = today - timedelta(days=1)
start_day = end_day - timedelta(days=6)

minute_df = yf.download(
    TICKER,
    start=start_day,
    end=end_day + timedelta(days=1),
    interval="1m",
    auto_adjust=False,
    progress=False
)

minute_df.index = minute_df.index.tz_localize(None)
if isinstance(minute_df.columns, pd.MultiIndex):
    minute_df.columns = minute_df.columns.get_level_values(0)

minute_df = minute_df[['Open','High','Low','Close','Volume']]
minute_df.to_csv(MIN_DB_FILE)

print("‚úÖ Minute DB (6 days) saved:", minute_df.shape)

# ============================================================
# 3. NORMALIZATION (SEPARATE)
# ============================================================
day_min, day_max = daily_df.min(), daily_df.max()
min_min, min_max = minute_df.min(), minute_df.max()

def norm_day(x):
    return (x - day_min) / (day_max - day_min + 1e-8)

def norm_min(x):
    return (x - min_min) / (min_max - min_min + 1e-8)

def denorm_day_close(x):
    return x * (day_max["Close"] - day_min["Close"]) + day_min["Close"]

def denorm_min_close(x):
    return x * (min_max["Close"] - min_min["Close"]) + min_min["Close"]

# ============================================================
# 4. DATASETS
# ============================================================
class DailyDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.data = norm_day(df).values

    def __len__(self):
        return len(self.data) - SEQ_LEN_DAY - 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + SEQ_LEN_DAY]
        y = self.data[idx + SEQ_LEN_DAY][3]  # Close
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

class MinDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.data = norm_min(df).values

    def __len__(self):
        return len(self.data) - SEQ_LEN_MIN - 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + SEQ_LEN_MIN]
        y = self.data[idx + SEQ_LEN_MIN][3]  # Close
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

class HybridDataset(torch.utils.data.Dataset):
    def __init__(self, min_df, day_df):
        self.min_data = norm_min(min_df).values
        self.day_data = norm_day(day_df).values

    def __len__(self):
        return len(self.min_data) - SEQ_LEN_MIN

    def __getitem__(self, idx):
        min_x = self.min_data[idx:idx + SEQ_LEN_MIN]
        day_x = self.day_data[-SEQ_LEN_DAY:]
        y = self.min_data[idx + SEQ_LEN_MIN][3]  # Close
        return (
            torch.tensor(day_x, dtype=torch.float32),
            torch.tensor(min_x, dtype=torch.float32),
            torch.tensor(y, dtype=torch.float32)
        )

daily_loader = torch.utils.data.DataLoader(
    DailyDataset(daily_df),
    batch_size=BATCH_SIZE,
    shuffle=True
)

min_loader = torch.utils.data.DataLoader(
    MinDataset(minute_df),
    batch_size=BATCH_SIZE,
    shuffle=True
)

hybrid_loader = torch.utils.data.DataLoader(
    HybridDataset(minute_df, daily_df),
    batch_size=BATCH_SIZE,
    shuffle=True
)

# ============================================================
# 5. SEPARATE MODELS
# ============================================================
class DailyModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, 32, batch_first=True)
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

class MinuteModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, 64, batch_first=True)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])

# ============================================================
# 6. HYBRID MODEL (USING PRETRAINED LSTMS)
# ============================================================
class HybridModel(nn.Module):
    def __init__(self, day_lstm, min_lstm):
        super().__init__()
        self.day_lstm = day_lstm
        self.min_lstm = min_lstm
        self.fc = nn.Sequential(
            nn.Linear(32 + 64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, day_x, min_x):
        _, (h_day, _) = self.day_lstm(day_x)
        _, (h_min, _) = self.min_lstm(min_x)
        h = torch.cat([h_day[-1], h_min[-1]], dim=1)
        return self.fc(h)

# ============================================================
# 7. PRETRAIN DAILY MODEL
# ============================================================
print("\nPretraining daily model...\n")

daily_model = DailyModel(daily_df.shape[1]).to(device)
optimizer_day = optim.Adam(daily_model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

for e in range(EPOCHS):
    total_loss = 0
    for x, y in daily_loader:
        x, y = x.to(device), y.to(device)
        pred = daily_model(x).squeeze()
        loss = loss_fn(pred, y)
        optimizer_day.zero_grad()
        loss.backward()
        optimizer_day.step()
        total_loss += loss.item()
    print(f"Epoch {e+1}/{EPOCHS} | Daily Loss: {total_loss/len(daily_loader):.6f}")

print("‚úÖ Daily pretraining complete")

# ============================================================
# 8. 7-DAY PREDICTION USING DAILY MODEL (INSTANTLY AFTER DAILY PRETRAINING)
# ============================================================
print("\nüîÆ 7-Day Predictions (using daily model):\n")

daily_model.eval()
current_seq = norm_day(daily_df.tail(SEQ_LEN_DAY)).values.copy()
last_date = daily_df.index[-1]

for i in range(7):
    x = torch.tensor(current_seq, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad():
        pred_norm = daily_model(x).item()
    pred_price = denorm_day_close(pred_norm)
    future_date = last_date + timedelta(days=i+1)
    print(f"Day {i+1} ({future_date.date()}): {pred_price:.2f}")

    # Roll forward: append new row with predicted close (copy other features from last, approx)
    new_row = current_seq[-1].copy()
    new_row[3] = pred_norm  # Update close
    # Approximate other features (e.g., Open=prev Close, High/Low/Vol=avg of last few)
    new_row[0] = current_seq[-1][3]  # Open ‚âà prev Close
    new_row[1] = max(current_seq[-5:, 1].max(), pred_norm)  # High approx
    new_row[2] = min(current_seq[-5:, 2].min(), pred_norm)  # Low approx
    new_row[4] = current_seq[-5:, 4].mean()  # Vol avg
    current_seq = np.vstack((current_seq[1:], new_row))

# ============================================================
# 9. PRETRAIN MINUTE MODEL
# ============================================================
print("\nPretraining minute model...\n")

min_model = MinuteModel(minute_df.shape[1]).to(device)
optimizer_min = optim.Adam(min_model.parameters(), lr=0.001)

for e in range(EPOCHS):
    total_loss = 0
    for x, y in min_loader:
        x, y = x.to(device), y.to(device)
        pred = min_model(x).squeeze()
        loss = loss_fn(pred, y)
        optimizer_min.zero_grad()
        loss.backward()
        optimizer_min.step()
        total_loss += loss.item()
    print(f"Epoch {e+1}/{EPOCHS} | Minute Loss: {total_loss/len(min_loader):.6f}")

print("‚úÖ Minute pretraining complete")

# ============================================================
# 10. TRAIN HYBRID MODEL (FREEZE LSTMS, TRAIN COMBINER)
# ============================================================
hybrid_model = HybridModel(daily_model.lstm, min_model.lstm).to(device)

# Freeze pretrained LSTMs
for param in hybrid_model.day_lstm.parameters():
    param.requires_grad = False
for param in hybrid_model.min_lstm.parameters():
    param.requires_grad = False

optimizer_hybrid = optim.Adam(filter(lambda p: p.requires_grad, hybrid_model.parameters()), lr=0.001)

print("\nTraining hybrid combiner...\n")

for e in range(EPOCHS):
    total_loss = 0
    for day_x, min_x, y in hybrid_loader:
        day_x, min_x, y = day_x.to(device), min_x.to(device), y.to(device)
        pred = hybrid_model(day_x, min_x).squeeze()
        loss = loss_fn(pred, y)
        optimizer_hybrid.zero_grad()
        loss.backward()
        optimizer_hybrid.step()
        total_loss += loss.item()
    print(f"Epoch {e+1}/{EPOCHS} | Hybrid Loss: {total_loss/len(hybrid_loader):.6f}")

print("‚úÖ Hybrid training complete")

# ============================================================
# 11. LIVE PREDICTION LOOP (USING HYBRID MODEL, TRUE 1-MIN GAP, ROLLING WINDOW)
# ============================================================
print("\nüöÄ Live hybrid prediction started...\n")

hybrid_model.eval()
minute_db = minute_df.copy()
last_processed_time = minute_db.index.max()

while True:
    today_df = yf.download(
        TICKER,
        period="1d",
        interval="1m",
        auto_adjust=False,
        progress=False
    )

    today_df.index = today_df.index.tz_localize(None)
    if isinstance(today_df.columns, pd.MultiIndex):
        today_df.columns = today_df.columns.get_level_values(0)

    today_df = today_df[['Open','High','Low','Close','Volume']]
    new_rows = today_df[today_df.index > last_processed_time]

    if new_rows.empty:
        time.sleep(30)
        continue

    ts = new_rows.index[0]
    row = new_rows.iloc[0]

    if len(minute_db) >= SEQ_LEN_MIN:
        min_window = norm_min(minute_db.tail(SEQ_LEN_MIN))  # Rolling last 60 min
        day_window = norm_day(daily_df.tail(SEQ_LEN_DAY))

        min_x = torch.tensor(min_window.values, dtype=torch.float32).unsqueeze(0).to(device)
        day_x = torch.tensor(day_window.values, dtype=torch.float32).unsqueeze(0).to(device)

        with torch.no_grad():
            pred_norm = hybrid_model(day_x, min_x).item()

        pred_price = denorm_min_close(pred_norm)
        print(f"üîÆ {ts} | Predicted: {pred_price:.2f}")

    # Append REAL minute (updates the rolling window)
    minute_db.loc[ts] = row
    minute_db.to_csv(MIN_DB_FILE)

    print(f"üìç {ts} | Real: {row['Close']:.2f}")

    last_processed_time = ts
    time.sleep(60)