In [5]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from ta.trend import SMAIndicator
from ta.volume import OnBalanceVolumeIndicator

# 1. Compute Technical Indicators
def compute_technical_indicators(df):
    df['RSI'] = RSIIndicator(close=df['Close'], window=14).rsi()
    df['MA'] = SMAIndicator(close=df['Close'], window=20).sma_indicator()
    bb = BollingerBands(close=df['Close'], window=20)
    df['BB_High'] = bb.bollinger_hband()
    df['BB_Low'] = bb.bollinger_lband()
    df['OBV'] = OnBalanceVolumeIndicator(close=df['Close'], volume=df['Volume']).on_balance_volume()
    df = df.dropna()  # Drop rows with NaN values
    return df

# 2. Generate Labels: Buy (1) or Short (0)
def generate_labels(df):
    df['Future_Close'] = df['Close'].shift(-90)  # 3 months ahead
    df['Signal'] = np.where(df['Future_Close'] > df['Close'], 1, 0)
    df = df.dropna()  # Drop rows with NaN values
    return df

# 3. Preprocess Data
def preprocess_data(df):
    scaler = MinMaxScaler()
    features = ['RSI', 'MA', 'BB_High', 'BB_Low', 'OBV', 'Close', 'Volume']
    df[features] = scaler.fit_transform(df[features])
    return df, scaler

# 4. Dataset and DataLoader
class StockDataset(torch.utils.data.Dataset):
    def __init__(self, data, lookback=252):
        self.data = data
        self.lookback = lookback
    
    def __len__(self):
        return len(self.data) - self.lookback
    
    def __getitem__(self, idx):
        x = self.data.iloc[idx:idx + self.lookback, :-1].values
        y = self.data.iloc[idx + self.lookback, -1]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.long)

# 5. Define the Model
class StockPredictor(nn.Module):
    def __init__(self, input_dim):
        super(StockPredictor, self).__init__()
        self.lstm = nn.LSTM(input_dim, 64, batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # 2 classes: Buy or Short
        )
    
    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1])
        return out

# 6. Training and Evaluation
def train_model(model, train_loader, val_loader, epochs=20, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for x, y in train_loader:
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        val_loss = evaluate_model(model, val_loader, criterion)
        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

def evaluate_model(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, y in val_loader:
            outputs = model(x)
            loss = criterion(outputs, y)
            val_loss += loss.item()
    return val_loss / len(val_loader)

# 7. Main Workflow
def main():
    # Load data
    df = pd.read_csv('your_stock_data.csv')  # Replace with your data file
    df = compute_technical_indicators(df)
    df = generate_labels(df)
    df, scaler = preprocess_data(df)
    
    # Split data
    train_data, test_data = train_test_split(df, test_size=0.2, shuffle=False)
    train_dataset = StockDataset(train_data)
    test_dataset = StockDataset(test_data)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Initialize and train the model
    input_dim = len(train_data.columns) - 1  # Exclude label column
    model = StockPredictor(input_dim)
    train_model(model, train_loader, val_loader)



ModuleNotFoundError: No module named 'torch'

In [4]:
!pip install torch torchvision torchaudio

Collecting torch
  Downloading torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.20.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading torchaudio-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting filelock (from torch)
  Downloading filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.8.0 (from torch)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylin

In [1]:
import pandas as pd
import numpy as np

# Generate synthetic stock data
def create_dummy_stock_data():
    np.random.seed(42)
    n_days = 1500  # ~6 years of data
    dates = pd.date_range(start="2015-01-01", periods=n_days, freq="B")  # Business days
    close_prices = np.cumsum(np.random.randn(n_days) * 2 + 100)  # Random walk
    volumes = np.random.randint(100, 1000, size=n_days)  # Random volume data

    # Build DataFrame
    df = pd.DataFrame({
        "Date": dates,
        "Close": close_prices,
        "Volume": volumes
    })
    return df

# Save the dummy data to a CSV
df_dummy = create_dummy_stock_data()


Dummy stock data created and saved to 'dummy_stock_data.csv'
