In [10]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LeakyReLU
import yfinance as yf

In [11]:
# Step 1:Download historical data for a specific stock (e.g., TESLA)
df = yf.download('TSLA', start='2020-01-01', end='2023-01-01')
df_test = yf.download('TSLA', start='2023-12-01', end='2024-01-01')

# Save data to a CSV file
df.to_csv('TSLA_historical.csv')
df_test.to_csv('TSLA_historical_Dec2023_Jan2024.csv')    

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [12]:
# Step 2: Process stock data
# Keeping only relevant features (e.g., 'Open', 'Close', 'Volume')
df = df[['Open', 'Close', 'Volume']].reset_index(drop=True)
df_test = df_test[['Open', 'Close', 'Volume']].reset_index(drop=True)
print(df.head())

        Open      Close     Volume
0  28.299999  28.684000  142981500
1  29.366667  29.534000  266677500
2  29.364668  30.102667  151995000
3  30.760000  31.270666  268231500
4  31.580000  32.809334  467164500


In [13]:
print(f"Shape of the train set: {df.shape}")

Shape of the train set: (756, 3)


In [14]:
# Step 3: Apply MinMaxScaler normalization
scaler = MinMaxScaler()
df_features = scaler.fit_transform(df)
df_features_test = scaler.transform(df_test)
print(df_features)
print(df_features_test)

[[0.00859013 0.01192745 0.12838504]
 [0.01135001 0.01413016 0.26820505]
 [0.01134484 0.01560381 0.13857346]
 ...
 [0.2208854  0.22967419 0.21665309]
 [0.24686279 0.25328203 0.21761706]
 [0.24572433 0.25680637 0.1451095 ]]
[[0.53859091 0.55650421 0.10373432]
 [0.54534399 0.54808209 0.08443503]
 [0.54047969 0.55621915 0.12272152]
 [0.56389557 0.55790356 0.10968302]
 [0.56035086 0.56637752 0.08787413]
 [0.557039   0.56948722 0.08316938]
 [0.56342986 0.55886241 0.07744279]
 [0.55258869 0.55178781 0.07452015]
 [0.54130767 0.55769624 0.13212062]
 [0.55949701 0.58817138 0.14855922]
 [0.58534504 0.59452035 0.12017789]
 [0.59199461 0.59084054 0.09835724]
 [0.59121839 0.60416044 0.08741645]
 [0.59879946 0.57803891 0.10816926]
 [0.58713031 0.59711177 0.09064564]
 [0.59970506 0.59203257 0.07217071]
 [0.59383168 0.60257963 0.06498461]
 [0.603819   0.61509624 0.08714177]
 [0.61755803 0.59369108 0.09519609]
 [0.59540999 0.58151141 0.08049632]]


In [15]:
# Step 4: Autoencoder Model Implementation
# Input from df_features, dense64, leakyrelu, dense32, leakyrelu, dense16, tanh
input = Input(shape=df_features.shape[1:])
enc = Dense(64)(input)
enc = LeakyReLU()(enc)
enc = Dense(32)(enc)
enc = LeakyReLU()(enc)

# Latent space with tanh activation
latent_space = Dense(16, activation="tanh")(enc)

dec = Dense(32)(latent_space)
dec = LeakyReLU()(dec)
dec = Dense(64)(dec)
dec = LeakyReLU()(dec)

# Output layer to match the number of features
dec = Dense(units=df_features.shape[1], activation="sigmoid")(dec)

In [16]:
# Step 5: Initialize and compile the autoencoder
autoencoder = Model(input, dec)
autoencoder.compile(optimizer="adam", metrics=["mse"], loss="mse")

In [17]:
# Step 6: Train the autoencoder
autoencoder.fit(df_features, df_features, epochs=50, batch_size=32, validation_split=0.25)

Epoch 1/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0964 - mse: 0.0964 - val_loss: 0.0525 - val_mse: 0.0525
Epoch 2/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0617 - mse: 0.0617 - val_loss: 0.0114 - val_mse: 0.0114
Epoch 3/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0304 - mse: 0.0304 - val_loss: 0.0016 - val_mse: 0.0016
Epoch 4/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0082 - mse: 0.0082 - val_loss: 0.0019 - val_mse: 0.0019
Epoch 5/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0063 - mse: 0.0063 - val_loss: 0.0013 - val_mse: 0.0013
Epoch 6/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0055 - mse: 0.0055 - val_loss: 7.1077e-04 - val_mse: 7.1077e-04
Epoch 7/50
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 

<keras.src.callbacks.history.History at 0x1f1194b99a0>

In [18]:
# Step 7: Create encoder model to extract latent space features
encoder = Model(input, latent_space)

In [19]:
# Step 8: Predict on test set (df_features_test)
test_au_features = encoder.predict(df_features_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


In [20]:
# Step 9: Print the shape of the encoded test set features
print("Encoded test set features shape:", test_au_features.shape)

Encoded test set features shape: (20, 16)
