## üîß Setup: Check GPU and Install Dependencies

In [None]:
# Check GPU availability
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö†Ô∏è GPU not available. Training will be slower on CPU.")

In [None]:
# Install required packages
!pip install -q torch pandas numpy yfinance scikit-learn transformers matplotlib seaborn tqdm
print("‚úÖ All packages installed successfully!")

## üìÇ Option 1: Clone from GitHub (Recommended)

In [None]:
# If your code is on GitHub, clone it here
# !git clone https://github.com/YOUR_USERNAME/stock_price_prediction.git
# %cd stock_price_prediction
print("Skip this cell if uploading files manually")

## üìÇ Option 2: Upload Project Files to Colab

In [None]:
# Upload project files if not using GitHub
from google.colab import files
import os

# Create directory structure
os.makedirs('src', exist_ok=True)
os.makedirs('data/checkpoints', exist_ok=True)
os.makedirs('data/raw/news_cache', exist_ok=True)

print("Upload your Python files (model.py, train.py, etc.) when prompted...")
# Uncomment to upload:
# uploaded = files.upload()

## üì¶ Define All Required Modules Inline (Standalone Version)

In [None]:
# Configuration
import os

# Add your NewsAPI key here
NEWS_API_KEY = "c5f10cd6942f4917a04c5a8d41119d80"  # Replace with your key
HISTORY_DAYS = 5
NEWSAPI_ENDPOINT = "https://newsapi.org/v2/everything"
INCLUDE_GLOBAL_SENTIMENT = False

In [None]:
# Data Gathering Module
import yfinance as yf
import pandas as pd
import numpy as np
import requests
import json
from datetime import datetime, timedelta
from pathlib import Path
from transformers import pipeline

# Cache directory
CACHE_DIR = Path("data/raw/news_cache")
CACHE_DIR.mkdir(parents=True, exist_ok=True)

_sentiment_pipeline = None

def get_sentiment_pipeline():
    global _sentiment_pipeline
    if _sentiment_pipeline is None:
        _sentiment_pipeline = pipeline(
            "sentiment-analysis",
            model="distilbert/distilbert-base-uncased-finetuned-sst-2-english",
            revision="714eb0f"
        )
    return _sentiment_pipeline

def fetch_newsapi_headlines(query: str, from_date: str, to_date: str, page_size: int = 20):
    cache_file = CACHE_DIR / f"{query}_{from_date}_{to_date}.json"
    if cache_file.exists():
        with open(cache_file, "r", encoding="utf-8") as f:
            return json.load(f)
    params = {
        "q": query,
        "from": from_date,
        "to": to_date,
        "language": "en",
        "pageSize": page_size,
        "sortBy": "publishedAt",
        "apiKey": NEWS_API_KEY,
    }
    resp = requests.get(NEWSAPI_ENDPOINT, params=params)
    j = resp.json()
    if j.get("status") != "ok":
        print(f"[WARN] NewsAPI error for {query}: {j}")
        headlines = []
    else:
        headlines = [a.get("title", "") for a in j.get("articles", [])]
    with open(cache_file, "w", encoding="utf-8") as f:
        json.dump(headlines, f, ensure_ascii=False, indent=2)
    return headlines

def compute_sentiment_score(headlines):
    if not headlines:
        return 0.0
    pipe = get_sentiment_pipeline()
    scores = []
    for h in headlines:
        try:
            result = pipe(h)[0]
            label = result["label"].lower()
            score = result["score"]
            if "pos" in label:
                scores.append(score)
            elif "neg" in label:
                scores.append(-score)
            else:
                scores.append(0.0)
        except Exception:
            scores.append(0.0)
    return float(np.mean(scores))

def gather_data(ticker: str, days_back=60):
    """Gather stock data with features"""
    end = datetime.today()
    start_stock = end - timedelta(days=days_back)
    NEWS_DAYS = 20
    start_news = end - timedelta(days=NEWS_DAYS)

    df = yf.download(ticker, start=start_stock, end=end, progress=False, auto_adjust=False)
    if df.empty:
        raise ValueError(f"No data found for {ticker}")
    vix = yf.download("^VIX", start=start_stock, end=end, progress=False, auto_adjust=False)
    df["vix_index"] = vix["Close"].reindex(df.index).ffill()

    sentiments = []
    for dt in df.index:
        date_str = dt.strftime("%Y-%m-%d")
        if dt < start_news:
            comp_score, global_score = 0.0, 0.0
        else:
            try:
                company_news = fetch_newsapi_headlines(ticker, date_str, date_str)
                comp_score = compute_sentiment_score(company_news)
            except:
                comp_score = 0.0
            global_score = 0.0
        sentiments.append((comp_score, global_score))
    df["sentiment_comp"] = [s[0] for s in sentiments]
    df["sentiment_global"] = [s[1] for s in sentiments]

    np.random.seed(42)
    df["interest_rate"] = 5.0 + np.random.normal(0, 0.1, len(df))
    df["inflation_rate"] = 2.5 + np.random.normal(0, 0.05, len(df))
    df["gdp_growth"] = 1.8 + np.random.normal(0, 0.03, len(df))

    X, y = [], []
    for i in range(HISTORY_DAYS, len(df)-1):
        window = df.iloc[i-HISTORY_DAYS:i][["Open","High","Low","Close","Volume"]].values.flatten()
        sentiment_vec = np.array(df.iloc[i][["sentiment_comp","sentiment_global"]], dtype=np.float32).flatten()
        macro_vec = np.array(df.iloc[i][["interest_rate","inflation_rate","gdp_growth"]], dtype=np.float32).flatten()
        vix_value = df["vix_index"].iloc[i]
        if pd.isna(vix_value):
            vix_value = 0.0
        market_vec = np.array([vix_value], dtype=np.float32).flatten()
        X_i = np.concatenate([window, sentiment_vec, macro_vec, market_vec])
        y_i = np.float32(df.iloc[i+1]["Close"])
        X.append(X_i)
        y.append(y_i)
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

print("‚úÖ Data gathering module loaded")

In [None]:
# Preprocessing Module
from sklearn.preprocessing import StandardScaler

def scale_features(X, y=None):
    scaler_X = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)
    if y is not None:
        scaler_y = StandardScaler()
        y_scaled = scaler_y.fit_transform(y.reshape(-1,1))
        return X_scaled, y_scaled, scaler_X, scaler_y
    return X_scaled, scaler_X

print("‚úÖ Preprocessing module loaded")

In [None]:
# Dataset Module
import torch
from torch.utils.data import Dataset

class StockDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx].view(-1, 1)

print("‚úÖ Dataset module loaded")

In [None]:
# Advanced Model Architecture
import torch
import torch.nn as nn
import torch.nn.functional as F

class AdvancedStockPredictor(nn.Module):
    """Advanced deep learning model with LSTM, Attention, and Residual connections"""
    
    def __init__(self, input_dim, hidden_dim=256, num_layers=3, dropout=0.3):
        super().__init__()
        
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        
        # Input projection
        self.input_projection = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        
        # LSTM for temporal dependencies
        self.lstm = nn.LSTM(
            hidden_dim, hidden_dim, num_layers=num_layers,
            batch_first=True, dropout=dropout if num_layers > 1 else 0.0,
            bidirectional=True
        )
        
        # Multi-head attention
        self.attention = nn.MultiheadAttention(
            embed_dim=hidden_dim * 2, num_heads=8,
            dropout=dropout, batch_first=True
        )
        
        # Residual blocks
        self.residual_blocks = nn.ModuleList([
            ResidualBlock(hidden_dim * 2, dropout) for _ in range(3)
        ])
        
        # Output network
        self.output_network = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.LayerNorm(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LayerNorm(hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout / 2),
            nn.Linear(hidden_dim // 2, hidden_dim // 4),
            nn.ReLU(),
            nn.Dropout(dropout / 2),
            nn.Linear(hidden_dim // 4, 1)
        )
        
        self.apply(self._init_weights)
    
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.constant_(module.bias, 0)
        elif isinstance(module, nn.LSTM):
            for name, param in module.named_parameters():
                if 'weight' in name:
                    nn.init.xavier_uniform_(param)
                elif 'bias' in name:
                    nn.init.constant_(param, 0)
    
    def forward(self, x):
        x = self.input_projection(x)
        x = x.unsqueeze(1)
        lstm_out, _ = self.lstm(x)
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
        attn_out = attn_out.squeeze(1)
        out = attn_out
        for block in self.residual_blocks:
            out = block(out)
        prediction = self.output_network(out)
        return prediction

class ResidualBlock(nn.Module):
    def __init__(self, dim, dropout=0.3):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.LayerNorm(dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dim, dim),
            nn.LayerNorm(dim)
        )
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        return self.dropout(F.relu(self.block(x) + x))

print("‚úÖ Model architecture loaded")

In [None]:
# Training Module with Early Stopping
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

class EarlyStopping:
    def __init__(self, patience=10, min_delta=1e-4):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_model_state = None
    
    def __call__(self, score, model):
        if self.best_score is None:
            self.best_score = score
            self.best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        elif score < self.best_score - self.min_delta:
            self.best_score = score
            self.best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        return self.early_stop
    
    def load_best_model(self, model):
        if self.best_model_state is not None:
            model.load_state_dict(self.best_model_state)

def train_deep_learning_model(
    X, y, epochs=200, batch_size=64, lr=1e-3,
    hidden_dim=256, num_layers=3, dropout=0.3,
    train_split=0.7, val_split=0.15, patience=15
):
    """Advanced training with proper splits and regularization"""
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"\n{'='*60}")
    print(f"Training on: {device}")
    if device.type == 'cuda':
        print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"{'='*60}\n")
    
    # Data splits
    n_samples = len(X)
    indices = np.arange(n_samples)
    test_size = 1.0 - train_split - val_split
    train_val_idx, test_idx = train_test_split(indices, test_size=test_size, random_state=42, shuffle=False)
    val_size_adjusted = val_split / (train_split + val_split)
    train_idx, val_idx = train_test_split(train_val_idx, test_size=val_size_adjusted, random_state=42, shuffle=False)
    
    print(f"Data Split:")
    print(f"  Train: {len(train_idx)} ({len(train_idx)/n_samples*100:.1f}%)")
    print(f"  Val:   {len(val_idx)} ({len(val_idx)/n_samples*100:.1f}%)")
    print(f"  Test:  {len(test_idx)} ({len(test_idx)/n_samples*100:.1f}%)\n")
    
    # Scale data
    X_train, y_train = X[train_idx], y[train_idx]
    X_val, y_val = X[val_idx], y[val_idx]
    X_test, y_test = X[test_idx], y[test_idx]
    
    X_train_scaled, y_train_scaled, scaler_X, scaler_y = scale_features(X_train, y_train)
    X_val_scaled = scaler_X.transform(X_val)
    y_val_scaled = scaler_y.transform(y_val.reshape(-1, 1))
    X_test_scaled = scaler_X.transform(X_test)
    y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))
    
    # Create datasets
    train_dataset = StockDataset(X_train_scaled, y_train_scaled)
    val_dataset = StockDataset(X_val_scaled, y_val_scaled)
    test_dataset = StockDataset(X_test_scaled, y_test_scaled)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Initialize model
    model = AdvancedStockPredictor(
        input_dim=X.shape[1], hidden_dim=hidden_dim,
        num_layers=num_layers, dropout=dropout
    ).to(device)
    
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model Parameters: {total_params:,}\n")
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, verbose=True
    )
    early_stopping = EarlyStopping(patience=patience)
    
    history = {'train_loss': [], 'val_loss': [], 'lr': []}
    
    # Training loop
    print("Starting training...\n")
    for epoch in tqdm(range(epochs), desc="Training"):
        # Train
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            train_loss += loss.item() * X_batch.size(0)
        train_loss /= len(train_dataset)
        
        # Validate
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                y_pred = model(X_batch)
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_dataset)
        
        history['train_loss'].append(train_loss)
        history['val_loss'].append(val_loss)
        history['lr'].append(optimizer.param_groups[0]['lr'])
        
        scheduler.step(val_loss)
        
        if (epoch + 1) % 10 == 0:
            print(f"\nEpoch {epoch+1}/{epochs} | Train: {train_loss:.6f} | Val: {val_loss:.6f}")
        
        if early_stopping(val_loss, model):
            print(f"\nEarly stopping at epoch {epoch+1}")
            break
    
    early_stopping.load_best_model(model)
    model = model.to(device)
    
    # Test evaluation
    model.eval()
    predictions, targets = [], []
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_pred = model(X_batch)
            predictions.extend(y_pred.cpu().numpy())
            targets.extend(y_batch.numpy())
    
    predictions = np.array(predictions).flatten()
    targets = np.array(targets).flatten()
    
    pred_orig = scaler_y.inverse_transform(predictions.reshape(-1, 1)).flatten()
    target_orig = scaler_y.inverse_transform(targets.reshape(-1, 1)).flatten()
    
    mse = np.mean((pred_orig - target_orig) ** 2)
    mae = np.mean(np.abs(pred_orig - target_orig))
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((target_orig - pred_orig) / target_orig)) * 100
    
    print(f"\n{'='*60}")
    print("Test Set Performance")
    print(f"{'='*60}")
    print(f"MSE:  {mse:.4f}")
    print(f"MAE:  {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAPE: {mape:.2f}%")
    print(f"{'='*60}")
    
    return model, history, (scaler_X, scaler_y), {
        'predictions': pred_orig,
        'targets': target_orig,
        'mse': mse, 'mae': mae, 'rmse': rmse, 'mape': mape
    }

print("‚úÖ Training module loaded")

## üìä Gather and Prepare Data

In [None]:
# Select stock ticker and gather data
TICKER = "AAPL"  # Change to any stock ticker
DAYS_BACK = 200  # More historical data for better patterns

print(f"Gathering data for {TICKER}...")
X, y = gather_data(TICKER, days_back=DAYS_BACK)

print(f"\n‚úÖ Data gathered successfully!")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Total samples: {len(X)}")

## üöÄ Train Deep Learning Model

In [None]:
# Train the advanced model
model, history, scalers, test_metrics = train_deep_learning_model(
    X, y,
    epochs=200,           # Maximum epochs (early stopping will trigger earlier)
    batch_size=64,        # Larger batch for more stable gradients
    lr=1e-3,              # Learning rate
    hidden_dim=256,       # Hidden layer size
    num_layers=3,         # LSTM layers
    dropout=0.3,          # Dropout rate
    train_split=0.7,      # 70% training
    val_split=0.15,       # 15% validation
    patience=15           # Early stopping patience
)

print("\n‚úÖ Training completed!")

## üìà Visualize Training Progress

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
plt.figure(figsize=(15, 5))

# Loss curves
plt.subplot(1, 3, 1)
plt.plot(history['train_loss'], label='Train Loss', linewidth=2)
plt.plot(history['val_loss'], label='Val Loss', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss (MSE)', fontsize=12)
plt.title('Training Progress', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)

# Learning rate
plt.subplot(1, 3, 2)
plt.plot(history['lr'], color='green', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Learning Rate', fontsize=12)
plt.title('Learning Rate Schedule', fontsize=14, fontweight='bold')
plt.yscale('log')
plt.grid(True, alpha=0.3)

# Predictions vs Actual
plt.subplot(1, 3, 3)
plt.scatter(test_metrics['targets'], test_metrics['predictions'], alpha=0.6)
plt.plot([test_metrics['targets'].min(), test_metrics['targets'].max()],
         [test_metrics['targets'].min(), test_metrics['targets'].max()],
         'r--', linewidth=2, label='Perfect Prediction')
plt.xlabel('Actual Price', fontsize=12)
plt.ylabel('Predicted Price', fontsize=12)
plt.title('Test Set: Predictions vs Actual', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nTest Set Metrics:")
print(f"RMSE: ${test_metrics['rmse']:.2f}")
print(f"MAE:  ${test_metrics['mae']:.2f}")
print(f"MAPE: {test_metrics['mape']:.2f}%")

## üíæ Save Model and Scalers

In [None]:
import pickle
from pathlib import Path

# Create output directory
output_dir = Path('trained_models')
output_dir.mkdir(exist_ok=True)

# Save model
model_path = output_dir / 'advanced_stock_model.pth'
torch.save(model.state_dict(), model_path)
print(f"‚úÖ Model saved to {model_path}")

# Save scalers
scaler_X, scaler_y = scalers
with open(output_dir / 'scaler_X.pkl', 'wb') as f:
    pickle.dump(scaler_X, f)
with open(output_dir / 'scaler_y.pkl', 'wb') as f:
    pickle.dump(scaler_y, f)
print(f"‚úÖ Scalers saved")

# Download to local machine
from google.colab import files
print("\nDownloading files...")
files.download(str(model_path))
files.download(str(output_dir / 'scaler_X.pkl'))
files.download(str(output_dir / 'scaler_y.pkl'))
print("\n‚úÖ All files ready for download!")

## üîÆ Make Predictions on New Data

In [None]:
def predict_next_day(model, scalers, ticker, device='cpu'):
    """Predict next day's closing price"""
    scaler_X, scaler_y = scalers
    
    # Gather latest data
    X_latest, _ = gather_data(ticker, days_back=60)
    X_last = X_latest[-1:]
    
    # Scale and predict
    X_scaled = scaler_X.transform(X_last)
    X_tensor = torch.tensor(X_scaled, dtype=torch.float32).to(device)
    
    model.eval()
    with torch.no_grad():
        y_pred_scaled = model(X_tensor)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.cpu().numpy())
    
    return float(y_pred[0, 0])

# Example prediction
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
predicted_price = predict_next_day(model, scalers, TICKER, device)
print(f"\nüîÆ Predicted next day closing price for {TICKER}: ${predicted_price:.2f}")

## üìä Final Summary

### Key Improvements Over Simple Model:

1. **Advanced Architecture**
   - ‚úÖ LSTM layers for temporal pattern recognition
   - ‚úÖ Multi-head attention mechanism
   - ‚úÖ Residual connections for deeper networks

2. **Proper Data Management**
   - ‚úÖ 70/15/15 train/val/test split
   - ‚úÖ No data leakage (scalers fit on training only)
   - ‚úÖ Temporal ordering preserved

3. **Regularization Techniques**
   - ‚úÖ Dropout layers (0.3 rate)
   - ‚úÖ Early stopping (patience=15)
   - ‚úÖ L2 weight decay (1e-5)
   - ‚úÖ Gradient clipping
   - ‚úÖ Layer normalization

4. **Training Optimizations**
   - ‚úÖ AdamW optimizer
   - ‚úÖ Learning rate scheduling
   - ‚úÖ GPU acceleration
   - ‚úÖ Batch processing

### Next Steps:
- Experiment with different hyperparameters
- Try different stock tickers
- Extend to multi-day predictions
- Add more advanced features
- Implement ensemble methods