In [None]:
# Read Data
import pandas as pd
import numpy as np
ticker_names = ["MSFT", "AAPL", "NVDA", "AMZN", "GOOG", \
           "GOOGL", "META", "TSM", "LLY", "AVGO", \
            "NVO", "TSLA", "JPM", "V", "WMT", \
                "UNH", "MA", "XOM", "ASML", "JNJ"]
dfs = []
X = [] # 3d arr: num_ticker * num_sample * num_features
Y = [] # 2d arr: num_ticker * num_sample

for name in ticker_names:  
    df = pd.read_csv(f'../DATA/Cleaned_Datas/cleaned_{name}_prices.csv')
    dfs.append(df)
    X.append(df.iloc[:, 1:4].values)
    Y.append(df.iloc[:, 4].values)


X = np.array(X)
Y = np.array(Y)

X_trains = [] # 3d arr: ticker * sample * features
Y_trains = [] # 2d arr: ticker * sample
X_cvs = [] # 3d arr: ticker * sample * features
Y_cvs = [] # 2d arr: ticker * sample
X_tests = [] # 3d arr: ticker * sample * features
Y_tests = [] # 2d arr: ticker * sample

training_sizes = []
cv_sizes = []
test_sizes = []

for ticker in X:
    train_size = int(len(ticker) * 0.6)
    cv_size = int(len(ticker) * 0.2)
    test_size = len(ticker) - train_size - cv_size

    training_sizes.append(train_size)
    cv_sizes.append(cv_size)
    test_sizes.append(test_size)

    X_train = ticker[:train_size]
    X_cv = ticker[train_size:train_size+cv_size]
    X_test = ticker[train_size+cv_size:]

    X_trains.append(X_train)
    X_cvs.append(X_cv)
    X_tests.append(X_test)


for ticker in Y:
    train_size = int(len(ticker) * 0.6)
    cv_size = int(len(ticker) * 0.2)

    Y_train = ticker[:train_size]
    Y_cv = ticker[train_size:train_size+cv_size]
    Y_test = ticker[train_size+cv_size:]

    Y_trains.append(Y_train)
    Y_cvs.append(Y_cv)
    Y_tests.append(Y_test)


X_trains = np.array(X_trains) # 3d arr: ticker * sample * features
Y_trains = np.array(Y_trains) # 2d arr: ticker * sample
X_cvs = np.array(X_cvs) # 3d arr: ticker * sample * features
Y_cvs = np.array(Y_cvs) # 2d arr: ticker * sample
X_tests = np.array(X_tests) # 3d arr: ticker * sample * features
Y_tests = np.array(Y_tests) # 2d arr: ticker * sample

print(Y_trains[0].shape)


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Activation, Dropout
from tensorflow.keras.losses import mse, mae
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.preprocessing import MinMaxScaler




time_steps = 7 # a week of steps
input_dim = 3 # OHL

# # Initialize an empty list to store the reshaped sequences
# X_train_reshaped = []

# # Calculate the new number of samples based on the time steps
# num_sequences = len(X_train) - time_steps + 1

# # Reshape the data
# for i in range(num_sequences):
#     # Extract a sequence of length `time_steps` starting from index i
#     sequence = X_train[i:i + time_steps, :]
#     # Append the sequence to your new list
#     X_train_reshaped.append(sequence)



for i in range(len(ticker_names)):
    x_t = X_trains[i]
    y_t = Y_trains[i]
    x_cv = X_cvs[i]
    y_cv = Y_cvs[i]

    # print(x_t.shape, y_t.shape)

    scaler_x = MinMaxScaler(feature_range=(0, 1))
    scaler_y = MinMaxScaler(feature_range=(0, 1))
    scaler_x.fit(x_t)
    # scaler_y.fit(y_t)
    x_t_scaled = scaler_x.transform(x_t)
    x_cv_scaled = scaler_x.transform(x_cv)
    # y_t_scaled = scaler_y.transform(y_t)
    # y_cv_scaled = scaler_y.transform(y_cv)

    # sample_len = len(x_t) - time_steps + 1

    # for j in range(sample_len):
    #     sample_reshaped = []
    #     for h in range(time_steps):
    #         sample_len.append(X_trains[h+j])
        
    #     x_t_scaled[j] = sample_reshaped

    # print(x_t.shape)

    model = Sequential([   
        LSTM(units= 256, activation='relu', return_sequences=True, input_shape=(time_steps, input_dim)),
        Dropout(0.1),
        LSTM(units= 128, activation='relu', return_sequences=False),
        Dropout(0.1),
        Dense(units= 64, activation='relu'),
        Dropout(0.1),
        Dense(units= 1, activation='linear'),
    ])

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')
    model.fit(x_t_scaled, y_t_scaled, validation_data=(x_cv_scaled, y_cv_scaled) , epochs=100, batch_size=64)
    model.save(f'{ticker_names[i]}.h5')
