In [7]:
import pandas as pd
import numpy as np
from datetime import datetime
import joblib

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [8]:
# --- 1. Load Data ---
CSV_FILE        = "crypto_price_data.csv"
SAVE_MODEL_PATH = "mvp_lstm_model.h5"
SAVE_SCALER_PATH = "mvp_scaler.pkl"

print("Loading data from CSV...")
df = pd.read_csv(CSV_FILE)

Loading data from CSV...


In [9]:
# --- 2. Sort by Ticker, Date ---
df.sort_values(by=["Ticker", "Date"], inplace=True)
df.reset_index(drop=True, inplace=True)

In [10]:
# --- 3. Exclude Last 60 Days Per Ticker ---
LOOK_BACK = 60
df_list = []
for ticker in df["Ticker"].unique():
    sub = df[df["Ticker"] == ticker].copy()
    # If there's not enough data to drop 60 days, skip ticker
    if len(sub) <= LOOK_BACK:
        print(f"Skipping {ticker}: Not enough data to remove last 60 days.")
        continue
    
    # Drop the last 60 rows for this ticker
    sub = sub.iloc[:-LOOK_BACK]
    df_list.append(sub)

# Merge back
if not df_list:
    raise ValueError("No data available after excluding the last 60 days.")

df_train = pd.concat(df_list).reset_index(drop=True)
print("Data shape after excluding last 60 days:", df_train.shape)

Data shape after excluding last 60 days: (99264, 215)


In [11]:
# --- 4. Scale Data Per Ticker ---
scaler_dict = {}
df_scaled_list = []

for ticker in df_train["Ticker"].unique():
    sub = df_train[df_train["Ticker"] == ticker].copy()
    scaler = MinMaxScaler(feature_range=(0, 1))
    sub["CloseScaled"] = scaler.fit_transform(sub[["Close"]])
    scaler_dict[ticker] = scaler
    df_scaled_list.append(sub)

df_scaled = pd.concat(df_scaled_list).reset_index(drop=True)

# Save the scaler dictionary
joblib.dump(scaler_dict, SAVE_SCALER_PATH)
print(f"Scalers saved to {SAVE_SCALER_PATH}")


Scalers saved to mvp_scaler.pkl


In [12]:
# --- 5. Create Sequences for Multi-step Prediction ---
PREDICTION_HORIZONS = [1, 7, 30]  # Next-day, next-week, next-month

def create_sequences(df, look_back=LOOK_BACK, horizons=PREDICTION_HORIZONS):
    X_list, y_list, tickers_list = [], [], []
    
    for ticker in df["Ticker"].unique():
        sub = df[df["Ticker"] == ticker].copy().reset_index(drop=True)
        close_scaled = sub["CloseScaled"].values
        
        # Generate sequences
        for i in range(len(sub) - look_back - max(horizons)):
            X_seq = close_scaled[i : i + look_back].reshape(-1, 1)  # shape (60, 1)
            # Multi-step targets
            y_seq = [close_scaled[i + look_back + h - 1] for h in horizons]
            
            X_list.append(X_seq)
            y_list.append(y_seq)
            tickers_list.append(ticker)
    
    X_arr = np.array(X_list)
    y_arr = np.array(y_list)
    return X_arr, y_arr, tickers_list

print("Creating sequences...")
X, y, tickers_seq = create_sequences(df_scaled)

print(f"Full dataset: X.shape={X.shape}, y.shape={y.shape}")

Creating sequences...
Full dataset: X.shape=(94580, 60, 1), y.shape=(94580, 3)


In [13]:

# --- 6. Train/Validation Split (80/20) ---
# We'll do a simple random split at the sequence level.
# Time-series best practice is to keep chronological order,
# but since the last 60 days are already excluded, we can do a simple ratio.

split_index = int(len(X) * 0.8)
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y[:split_index], y[split_index:]

print(f"Train size: X_train={X_train.shape}, y_train={y_train.shape}")
print(f"Val size: X_val={X_val.shape}, y_val={y_val.shape}")


Train size: X_train=(75664, 60, 1), y_train=(75664, 3)
Val size: X_val=(18916, 60, 1), y_val=(18916, 3)


In [14]:
# --- 7. Build LSTM Model ---
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(LOOK_BACK, 1)),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(len(PREDICTION_HORIZONS))  # 3 outputs: EOD, EOW, EOM
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
model.summary()

  super().__init__(**kwargs)


In [15]:
# --- 8. Train Model ---
EPOCHS = 10
BATCH_SIZE = 32

print("Training model...")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    shuffle=False  # keep sequence order
)

Training model...
Epoch 1/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 21ms/step - loss: 0.0116 - val_loss: 0.0032
Epoch 2/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0052 - val_loss: 0.0029
Epoch 3/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0048 - val_loss: 0.0028
Epoch 4/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0047 - val_loss: 0.0028
Epoch 5/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0046 - val_loss: 0.0027
Epoch 6/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0045 - val_loss: 0.0026
Epoch 7/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0044 - val_loss: 0.0027
Epoch 8/10
[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 21ms/step - loss: 0.0044 - val_los

In [16]:
# --- 9. Save Model ---
model.save(SAVE_MODEL_PATH)
print(f"Model saved to {SAVE_MODEL_PATH}")



Model saved to mvp_lstm_model.h5
