In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import yfinance as yf
import pandas as pd
import math
from scipy.stats import norm
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense, Bidirectional, Dropout, Conv1D, MaxPooling1D, Flatten, Attention
from tensorflow.keras.models import Model
from ta.volume import MFIIndicator
from ta.utils import dropna
from statsmodels.tsa.seasonal import seasonal_decompose
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [12]:
# Kaggle function

def make_model():
    inp = Input(shape=(128, 10))
    x = Bidirectional(LSTM(128, return_sequences=True))(inp)
    x = Bidirectional(LSTM(32, return_sequences=True))(x)
    x = Attention(128)(x)
    # A intermediate full connected (Dense) can help to deal with nonlinears outputs
    x = Dense(64, activation="relu")(x)
    x = Dense(9, activation="softmax")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [2]:
# My function
def make_model():
    # 1. Input layer for stock data (sequence_length, features)
    inp = Input(shape=(128, 10))  # Example: 128 timesteps, 10 features (adjust as needed)

    # 2. Bidirectional LSTM layers for sequence learning
    x = Bidirectional(LSTM(128, return_sequences=True))(inp)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)

    # 3. Dropout to prevent overfitting
    x = Dropout(0.2)(x)

    # 4. 1D Convolutional layer for feature extraction
    x = Conv1D(filters=64, kernel_size=3, activation="relu", padding="same")(x)

    # 5. MaxPooling to downsample
    x = MaxPooling1D(pool_size=2)(x)

    # 6. Flatten layer to transition to fully connected layers
    x = Flatten()(x)

    # 7. Fully connected layers for high-level learning
    x = Dense(128, activation="relu")(x)
    x = Dense(64, activation="relu")(x)

    # 8. Output layer - Single neuron with tanh activation to produce values between -1 and 1
    output = Dense(1, activation="tanh")(x)

    # 9. Compile model with Adam optimizer
    model = Model(inputs=inp, outputs=output)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

    return model

# Create the model
model = make_model()
model.summary()


In [11]:
def calculate_rsi(data, window=14):
    delta = data['Close'].diff(1)
    gain = delta.where(delta > 0, 0.0)
    loss = -delta.where(delta < 0, 0.0)
    
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    rsi = rsi.fillna(50)  # Default to neutral if not enough data
    return rsi

In [3]:
def get_stock_data(ticker, start='2024-01-01', end='2025-03-24'):
    stock = yf.download(ticker, start=start, end=end)
    return stock

In [None]:


def data_preparation(data):
    data = dropna(data)  # Drop NA values from the dataframe
    data[['Log_Price']] = np.log(data['Close'])  # Log transformation for stationarity
    data[['Log_Volume']] = np.log(data['Volume'].replace(0,np.nan))  # Log transformation for volume, add 1 to avoid log(0)
    data['Log Volume'].fillna(data['Log Volume'].rolling(window=5, min_periods=1).mean(), inplace=True)

    data['Log Price Diff'] = data['Log Price'].diff()

    data['Percent Change'] = data['Close'].pct_change()  # Calculate percentage change for the close price

    #data['Dividend Yield'] = data['Dividends'] / data['Close']  * 100

    data['RSI'] = calculate_rsi(data)  # Calculate RSI 
    

    data['MFI'] = MFIIndicator(high=data['High'], low=data['Low'], close=data['Close'], volume=data['Volume'], window=14, fillna=True).mfi()
    
    data['Log Volume Diff'] = data['Log_Volume'].diff()  # Log volume difference for stationarity
    
    # Beta calculation (S&P500)
    market = yf.download('SPY', start=data.index[0], end=data.index[-1])['Close']
    data['Market Return'] = market.pct_change()
    data['Stock Return'] = data['Close'].pct_change()
    data['Beta'] = data['Stock Return'].rolling(30).cov(data['Market Return']) / data['Market Return'].rolling(30).var()
    
    data['PE Ratio'] = data['Close'] / data['Earnings'] if 'Earnings' in data.columns else np.nan
     # Example PE ratio calculation, ensure 'Earnings' column exists in your data

    data['MA50'] = data['Close'].rolling(50).mean()
    data['Log Diff MA50'] = np.log(data['Close']) - np.log(data['MA50'])

    data['MA200'] = data['Close'].rolling(200).mean()
    data['Log Diff MA200'] = np.log(data['Close']) - np.log(data['MA200'])

    decomposition = seasonal_decompose(data['Close'], model='additive', period=252)  # Assuming yearly seasonality
    data['Seasonality'] = decomposition.seasonal

    feature_columns = ['Log Price Diff', 'Percent Change', 'RSI', 'MFI', 'Log Volume Diff',
                       'Beta', 'PE Ratio', 'Log Diff MA50', 'Log Diff MA200', 'Seasonality']
    
    return data[feature_columns]

apple = get_stock_data('AAPL')
apple['Volume']
prepped_data = data_preparation(apple)
    #data['RSI'] = calculate_rsi(data)
    #data['SMA_20'] = data['Close'].rolling(window=20).mean()
    #data['SMA_50'] = data['Close'].rolling(window=50).mean()
    #data.dropna(inplace=True)
    
    # Normalize the data
    #data = (data - data.mean()) / data.std()
    

In [6]:
apple = get_stock_data('AAPL')
apple['Volume']
prepped_data = data_preparation(apple)


[*********************100%***********************]  1 of 1 completed


KeyError: 'Log Volume'