In [None]:
import pandas as pd
import yfinance as yf
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [None]:
stocks = ['META', 'GOOGL', 'UBS']
stocks_df = {
    'META' : pd.DataFrame(),
    'GOOGL' : pd.DataFrame(),
    'UBS' : pd.DataFrame()
}

In [None]:
start_date = '2010-01-01'
end_date = '2023-04-16'

for ticker in stocks:
    # Use yfinance library to get stock data
    stock = yf.download(ticker, start=start_date, end=end_date)
    # Add the stock data to the stock_data DataFrame
    stocks_df[ticker] = stock

In [None]:
# Scales valies using MinMax scaling
def scale_values(df):
    scaler = MinMaxScaler()
    cols = df.columns
    df[cols] = scaler.fit_transform(df[cols])

In [None]:
# Adds memory of previous with lag
def create_lag(df):
    for i in range(1, 4):
        df['Close_Lag' + str(i)] = df['Close'].shift(i)

In [None]:
# Adds a simple and exponential moving average
def create_moving_avg(df, period):
    df['SMA' + str(period)] = df['Close'].rolling(window=period).mean()
    df['EMA' + str(period)] = df['Close'].ewm(span=period, adjust=False).mean()

In [None]:
# Calculate relative strength index over 14 days
def create_RSI(df):
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    df['RSI_14'] = 100 - (100 / (1 + rs))

In [None]:
# Drops adjusted close and open as features
def select_features(df):
    df.drop(['Open', 'Adj Close'], axis=1, inplace=True)

In [None]:
def clean_df(df):
    scale_values(df)
    create_lag(df)
    create_moving_avg(df, 5)
    create_RSI(df)
    df.dropna(inplace=True)
    select_features(df)

In [None]:
for df in stocks_df.values():
    clean_df(df)

In [None]:
data = {
    'META': None,
    'GOOGL': None,
    'UBS': None
}

for key in data.keys():
    
    y = stocks_df[key]['Close']
    X = stocks_df[key].drop('Close', axis=1)
    
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.15, random_state=42)
    
    data[key] = {
        "X_trn" : X_train,
        "X_val" : X_val,
        "X_tst" : X_test,
        "y_trn" : y_train,
        "y_val" : y_val,
        "y_tst" : y_test
    }

In [None]:
def create_sequences(X, y, time_steps=1):
    """
    Creates input-output pairs using sliding window approach.

    Parameters:
        -- X: numpy array, input data
        -- y: numpy array, target data
        -- time_steps: int, size of the sliding window

    Returns:
        -- X_seq: numpy array, input sequences
        -- y_seq: numpy array, output sequences
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    return np.array(X_seq), np.array(y_seq)

In [None]:
time_steps = 7

for key, value in data.items():
    
    # Create sequences for training data
    X_train_seq, y_train_seq = create_sequences(value['X_trn'], value['y_trn'], time_steps)
    # Create sequences for validation data
    X_val_seq, y_val_seq = create_sequences(value['X_val'], value['y_val'], time_steps)
    # Create sequences for test data
    X_test_seq, y_test_seq = create_sequences(value['X_tst'], value['y_tst'], time_steps)
    
    value['X_trn'] = X_train_seq
    value['X_val'] = X_val_seq
    value['X_tst'] = X_test_seq
    value['y_trn'] = y_train_seq
    value['y_val'] = y_val_seq
    value['y_tst'] = y_test_seq

In [None]:
for key, value in data.items():
    
    num_features = 9
    
    # Convert input sequences to numpy arrays
    value['X_trn'] = np.array(value['X_trn'])
    value['X_val'] = np.array(value['X_val'])
    value['X_tst'] = np.array(value['X_tst'])

    # Convert output sequences to numpy arrays
    value['y_trn'] = np.array(value['y_trn'])
    value['y_val'] = np.array(value['y_val'])
    value['y_tst'] = np.array(value['y_tst'])

    # Reshape input sequences to 3D arrays (samples, time steps, features)
    value['X_trn'] = np.reshape(value['X_trn'], (value['X_trn'].shape[0], value['X_trn'].shape[1], num_features))
    value['X_val'] = np.reshape(value['X_val'], (value['X_val'].shape[0], value['X_val'].shape[1], num_features))
    value['X_tst'] = np.reshape(value['X_tst'], (value['X_tst'].shape[0], value['X_tst'].shape[1], num_features))

In [None]:
stocks_df['GOOGL']