In [1]:
# 1. IMPORT LIBRARIES
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# Scaling + metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# LSTM (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Technical indicators
import ta

ModuleNotFoundError: No module named 'pandas'

In [None]:
# 2. DEFINE PROJECT PATHS
PROJECT_ROOT = Path(r"C:\Users\moham\Downloads\scrape\text\crypto_project")

DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"
MODEL_DIR = PROJECT_ROOT / "models"
OUTPUT_FIGS = PROJECT_ROOT / "outputs" / "figures"

# Create folders if not existing
MODEL_DIR.mkdir(exist_ok=True, parents=True)
OUTPUT_FIGS.mkdir(exist_ok=True, parents=True)

ETH_FILE = DATA_PROCESSED / "eth_cleaned.csv"
NEWS_FILE = DATA_PROCESSED / "daily_news_aggregated.csv"

In [None]:
# 1. BASE PATH + FILE NAMES
from pathlib import Path
base_path = Path(r"C:\Users\moham\Downloads\scrape\text\crypto_project\data\processed")
eth_file = base_path / "eth_cleaned.csv"
news_file = base_path / "daily_news_aggregated.csv"


# 2. LOAD DATASETS
eth_df = pd.read_csv(eth_file)
daily_news = pd.read_csv(news_file)


# 3. CLEAN + PARSE DATES
eth_df["date"] = pd.to_datetime(eth_df["date"], errors="coerce")
daily_news["date"] = pd.to_datetime(daily_news["date"], errors="coerce")


# 4. DEFINE DATE RANGE
start_date = pd.to_datetime("2023-01-12")
end_date   = pd.to_datetime("2025-10-16")

eth_df_filtered = eth_df[(eth_df["date"] >= start_date) & (eth_df["date"] <= end_date)].copy()
daily_news_filtered = daily_news[(daily_news["date"] >= start_date) & (daily_news["date"] <= end_date)].copy()

eth_df_filtered = eth_df_filtered.sort_values("date").reset_index(drop=True)
daily_news_filtered = daily_news_filtered.sort_values("date").reset_index(drop=True)


# 5. MERGE ETH + NEWS
eth_news_merged = pd.merge(
    eth_df_filtered,
    daily_news_filtered,
    on="date",
    how="left"
)

eth_news_merged["news_count"] = eth_news_merged["news_count"].fillna(0)
eth_news_merged["mean_sentiment"] = eth_news_merged["mean_sentiment"].fillna(0)


# 6. COMPUTE TECHNICAL INDICATORS
import ta

eth_news_merged["rsi"] = ta.momentum.RSIIndicator(close=eth_news_merged["eth_close"], window=14).rsi()

macd = ta.trend.MACD(close=eth_news_merged["eth_close"])
eth_news_merged["macd"] = macd.macd()
eth_news_merged["macd_signal"] = macd.macd_signal()
eth_news_merged["macd_diff"] = macd.macd_diff()

eth_news_merged["sma_20"] = eth_news_merged["eth_close"].rolling(window=20).mean()
eth_news_merged["sma_50"] = eth_news_merged["eth_close"].rolling(window=50).mean()

bb = ta.volatility.BollingerBands(close=eth_news_merged["eth_close"], window=20, window_dev=2)
eth_news_merged["bb_upper"] = bb.bollinger_hband()
eth_news_merged["bb_lower"] = bb.bollinger_lband()
eth_news_merged["bb_middle"] = bb.bollinger_mavg()


# 7. TARGET VARIABLE
eth_news_merged["target_next_close"] = eth_news_merged["eth_close"].shift(-1)
eth_news_merged = eth_news_merged.dropna().reset_index(drop=True)


# 8. PREPARE FEATURES + SCALING
features = [
    "rsi", "macd", "macd_signal", "macd_diff",
    "sma_20", "sma_50", "bb_upper", "bb_lower", "bb_middle",
    "news_count", "mean_sentiment"
]

X = eth_news_merged[features].values
y = eth_news_merged["target_next_close"].values.reshape(-1,1)

# Scale features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)


# 9. CREATE LSTM SEQUENCE DATA
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i+time_steps)])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 10
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Train/test split (80/20)
train_size = int(len(X_seq)*0.8)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]


# 10. BUILD LSTM MODEL
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer="adam", loss="mse")
model.summary()

# 11. TRAIN MODEL
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=16,
    validation_split=0.1,
    shuffle=False
)

# 12. PREDICTION

y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_actual = scaler_y.inverse_transform(y_test)

# 13. EVALUATE
mse = mean_squared_error(y_test_actual, y_pred)
rmse = mse**0.5
r2 = r2_score(y_test_actual, y_pred)

print(f"LSTM Results: RMSE={rmse:.2f}, R2={r2:.4f}")

# 14. PLOT ACTUAL VS PREDICTED
plt.figure(figsize=(14,6))
plt.plot(y_test_actual, label="Actual Price", color="blue")
plt.plot(y_pred, label="Predicted Price", color="red", linestyle="--")
plt.title(f"LSTM ETH Price Prediction (RMSE={rmse:.2f})")
plt.xlabel("Time Index")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()

# 15. PREDICT NEXT N DAYS
n_days = 14
last_sequence = X_scaled[-time_steps:].copy()  # last sequence for prediction
pred_future_scaled = []

current_seq = last_sequence.copy()

for _ in range(n_days):
    input_seq = current_seq.reshape(1, time_steps, X_scaled.shape[1])
    next_pred_scaled = model.predict(input_seq)
    pred_future_scaled.append(next_pred_scaled[0,0])
    
    next_row = current_seq[-1].copy() 
    current_seq = np.vstack((current_seq[1:], next_row.reshape(1,-1)))

pred_future = scaler_y.inverse_transform(np.array(pred_future_scaled).reshape(-1,1))

# Create future dates
last_date = eth_news_merged["date"].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_days)

future_df = pd.DataFrame({
    "date": future_dates,
    "eth_close": np.nan,
    "rsi": np.nan,
    "macd": np.nan,
    "macd_signal": np.nan,
    "macd_diff": np.nan,
    "sma_20": np.nan,
    "sma_50": np.nan,
    "bb_upper": np.nan,
    "bb_lower": np.nan,
    "bb_middle": np.nan,
    "news_count": np.nan,
    "mean_sentiment": np.nan,
    "target_next_close": pred_future.flatten()
})

combined_df = pd.concat([eth_news_merged, future_df], ignore_index=True)


# 16. PLOT HISTORICAL + PREDICTIONS
plt.figure(figsize=(14,6))
plt.plot(combined_df["date"], combined_df["eth_close"], label="Historical ETH Close", color="blue")
plt.plot(combined_df["date"], combined_df["target_next_close"], label="Predicted ETH Close", color="red", linestyle="--")
plt.title(f"ETH Historical + {n_days}-Day Forecast")
plt.xlabel("Date")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


# 1. BASE PATH + FILE NAMES
base_path = Path(r"C:\Users\moham\Downloads\scrape\text\crypto_project\data")
eth_file = base_path / "eth_cleaned.csv"
news_file = base_path / "daily_news_aggregated.csv"


# 2. LOAD DATASETS
eth_df = pd.read_csv(eth_file)
daily_news = pd.read_csv(news_file)


# 3. CLEAN + PARSE DATES
eth_df["date"] = pd.to_datetime(eth_df["date"], errors="coerce")
daily_news["date"] = pd.to_datetime(daily_news["date"], errors="coerce")


# 4. FILTER DATE RANGE
start_date = pd.to_datetime("2023-01-12")
end_date = pd.to_datetime("2025-10-16")

eth_df = eth_df[(eth_df["date"] >= start_date) & (eth_df["date"] <= end_date)].copy()
daily_news = daily_news[(daily_news["date"] >= start_date) & (daily_news["date"] <= end_date)].copy()

eth_df = eth_df.sort_values("date").reset_index(drop=True)
daily_news = daily_news.sort_values("date").reset_index(drop=True)


# 5. MERGE ETH + NEWS
eth_news_merged = pd.merge(eth_df, daily_news, on="date", how="left")
eth_news_merged["news_count"] = eth_news_merged["news_count"].fillna(0)
eth_news_merged["mean_sentiment"] = eth_news_merged["mean_sentiment"].fillna(0)


# 6. TECHNICAL INDICATORS
eth_news_merged["rsi"] = ta.momentum.RSIIndicator(close=eth_news_merged["eth_close"], window=14).rsi()
macd = ta.trend.MACD(close=eth_news_merged["eth_close"])
eth_news_merged["macd"] = macd.macd()
eth_news_merged["macd_signal"] = macd.macd_signal()
eth_news_merged["macd_diff"] = macd.macd_diff()
eth_news_merged["sma_20"] = eth_news_merged["eth_close"].rolling(window=20).mean()
eth_news_merged["sma_50"] = eth_news_merged["eth_close"].rolling(window=50).mean()
bb = ta.volatility.BollingerBands(close=eth_news_merged["eth_close"], window=20, window_dev=2)
eth_news_merged["bb_upper"] = bb.bollinger_hband()
eth_news_merged["bb_lower"] = bb.bollinger_lband()
eth_news_merged["bb_middle"] = bb.bollinger_mavg()


# 7. TARGET VARIABLE
eth_news_merged["target_next_close"] = eth_news_merged["eth_close"].shift(-1)
eth_news_merged = eth_news_merged.dropna().reset_index(drop=True)


# 8. FEATURES + SCALING
features = [
    "rsi", "macd", "macd_signal", "macd_diff",
    "sma_20", "sma_50", "bb_upper", "bb_lower", "bb_middle",
    "news_count", "mean_sentiment"
]

X = eth_news_merged[features].values
y = eth_news_merged["target_next_close"].values.reshape(-1,1)

# Separate scalers for X and y
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)


# 9. CREATE SEQUENCES
def create_sequences(X, y, time_steps=30):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 30
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Train/test split
train_size = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:train_size], X_seq[train_size:]
y_train, y_test = y_seq[:train_size], y_seq[train_size:]


# 10. BUILD STACKED BIDIRECTIONAL LSTM
model = Sequential()
model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
model.summary()


# 11. TRAIN WITH EARLY STOPPING
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=100,
    batch_size=16,
    shuffle=False,
    callbacks=[early_stop, reduce_lr]
)

# 12. PREDICTION
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_actual = scaler_y.inverse_transform(y_test)


# 13. EVALUATE
mse = mean_squared_error(y_test_actual, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_actual, y_pred)
print(f"Stacked BiLSTM Results: RMSE={rmse:.2f}, R2={r2:.4f}")


# 14. PLOT ACTUAL VS PREDICTED
plt.figure(figsize=(14,6))
plt.plot(y_test_actual, label="Actual Price", color="blue")
plt.plot(y_pred, label="Predicted Price", color="red", linestyle="--")
plt.title(f"Stacked BiLSTM ETH Prediction (RMSE={rmse:.2f})")
plt.xlabel("Time Index")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()


# 15. PREDICT NEXT N DAYS
n_days = 14
last_sequence = X_scaled[-time_steps:].copy()
pred_future_scaled = []
current_seq = last_sequence.copy()

for _ in range(n_days):
    input_seq = current_seq.reshape(1, time_steps, X_scaled.shape[1])
    next_pred_scaled = model.predict(input_seq)
    pred_future_scaled.append(next_pred_scaled[0,0])
    
    # Shift sequence and repeat last features
    next_row = current_seq[-1].copy()
    current_seq = np.vstack((current_seq[1:], next_row.reshape(1,-1)))

pred_future = scaler_y.inverse_transform(np.array(pred_future_scaled).reshape(-1,1))


# 16. FUTURE DATAFRAME
last_date = eth_news_merged["date"].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_days)

future_df = pd.DataFrame({
    "date": future_dates,
    "eth_close": np.nan,
    "rsi": np.nan,
    "macd": np.nan,
    "macd_signal": np.nan,
    "macd_diff": np.nan,
    "sma_20": np.nan,
    "sma_50": np.nan,
    "bb_upper": np.nan,
    "bb_lower": np.nan,
    "bb_middle": np.nan,
    "news_count": np.nan,
    "mean_sentiment": np.nan,
    "target_next_close": pred_future.flatten()
})

combined_df = pd.concat([eth_news_merged, future_df], ignore_index=True)

# 17. PLOT HISTORICAL + FUTURE
plt.figure(figsize=(14,6))
plt.plot(combined_df["date"], combined_df["eth_close"], label="Historical ETH Close", color="blue")
plt.plot(combined_df["date"], combined_df["target_next_close"], label="Predicted ETH Close", color="red", linestyle="--")
plt.title(f"ETH Historical + {n_days}-Day Forecast (Stacked BiLSTM)")
plt.xlabel("Date")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import ta


# 1. LOAD DATA
base_path = Path(r"C:\Users\moham\Downloads\scrape\text\crypto_project\data")
eth_file = base_path / "eth_cleaned.csv"
news_file = base_path / "daily_news_aggregated.csv"

eth_df = pd.read_csv(eth_file)
daily_news = pd.read_csv(news_file)

eth_df["date"] = pd.to_datetime(eth_df["date"], errors="coerce")
daily_news["date"] = pd.to_datetime(daily_news["date"], errors="coerce")

start_date = pd.to_datetime("2023-01-12")
end_date = pd.to_datetime("2025-10-16")

eth_df = eth_df[(eth_df["date"] >= start_date) & (eth_df["date"] <= end_date)].copy()
daily_news = daily_news[(daily_news["date"] >= start_date) & (daily_news["date"] <= end_date)].copy()

eth_df = eth_df.sort_values("date").reset_index(drop=True)
daily_news = daily_news.sort_values("date").reset_index(drop=True)

# Merge
df = pd.merge(eth_df, daily_news, on="date", how="left")
df["news_count"] = df["news_count"].fillna(0)
df["mean_sentiment"] = df["mean_sentiment"].fillna(0)


# 2. TECHNICAL INDICATORS
df["rsi"] = ta.momentum.RSIIndicator(close=df["eth_close"], window=14).rsi()
macd = ta.trend.MACD(close=df["eth_close"])
df["macd"] = macd.macd()
df["macd_signal"] = macd.macd_signal()
df["macd_diff"] = macd.macd_diff()
df["sma_20"] = df["eth_close"].rolling(20).mean()
df["sma_50"] = df["eth_close"].rolling(50).mean()
bb = ta.volatility.BollingerBands(close=df["eth_close"], window=20, window_dev=2)
df["bb_upper"] = bb.bollinger_hband()
df["bb_lower"] = bb.bollinger_lband()
df["bb_middle"] = bb.bollinger_mavg()

df["target_next_close"] = df["eth_close"].shift(-1)
df = df.dropna().reset_index(drop=True)


# 3. FEATURES + SCALING
features = ["rsi","macd","macd_signal","macd_diff","sma_20","sma_50",
            "bb_upper","bb_lower","bb_middle","news_count","mean_sentiment"]

X = df[features].values
y = df["target_next_close"].values.reshape(-1,1)

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)


# 4. FUNCTION TO CREATE SEQUENCES
def create_sequences(X, y, time_steps):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)


# 5. HYPERPARAMETER OPTIONS
# time_steps_list = [10, 20, 30]
# lstm_units_list = [32, 64, 128]
# dropout_list = [0.1, 0.2, 0.3]
# batch_size_list = [16, 32]
# epochs_list = [50, 100]
# learning_rate_list = [0.001, 0.0005]

#test parameters
time_steps_list = [10]
lstm_units_list = [32]
dropout_list = [0.1]
batch_size_list = [8]
epochs_list = [50]
learning_rate_list = [0.01]


# 6. LOOP THROUGH HYPERPARAMETERS
results = []

for time_steps in time_steps_list:
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    train_size = int(len(X_seq)*0.8)
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]
    
    for lstm_units in lstm_units_list:
        for dropout_rate in dropout_list:
            for batch_size in batch_size_list:
                for epochs in epochs_list:
                    for lr in learning_rate_list:
                        # Build model
                        model = Sequential()
                        model.add(LSTM(lstm_units, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
                        model.add(Dropout(dropout_rate))
                        model.add(Dense(1))
                        model.compile(optimizer=Adam(learning_rate=lr), loss="mse")
                        
                        # Train model
                        history = model.fit(
                            X_train, y_train,
                            validation_split=0.1,
                            epochs=epochs,
                            batch_size=batch_size,
                            verbose=0,
                            shuffle=False
                        )
                        
                        # Predict
                        y_pred_scaled = model.predict(X_test)
                        y_pred = scaler_y.inverse_transform(y_pred_scaled)
                        y_test_actual = scaler_y.inverse_transform(y_test)
                        
                        rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred))
                        r2 = r2_score(y_test_actual, y_pred)
                        
                        # Store results
                        results.append({
                            "time_steps": time_steps,
                            "lstm_units": lstm_units,
                            "dropout": dropout_rate,
                            "batch_size": batch_size,
                            "epochs": epochs,
                            "learning_rate": lr,
                            "RMSE": rmse,
                            "R2": r2
                        })
                        
                        print(f"Done: time_steps={time_steps}, units={lstm_units}, dropout={dropout_rate}, batch={batch_size}, epochs={epochs}, lr={lr} | RMSE={rmse:.2f}, R2={r2:.4f}")

                        # Update best model
                        if rmse < best_rmse:
                            best_rmse = rmse
                            best_model = model
                            best_X_test = X_test
                            best_y_test = y_test_actual
                            best_y_pred = y_pred


# 7. RESULTS DATAFRAME
results_df = pd.DataFrame(results)
print(results_df.sort_values("RMSE").head(10))



# 8. SAVE BEST MODEL
best_model.save("best_lstm_model.h5")
print("Best model saved as best_lstm_model.h5")


# 9. PLOT BEST MODEL PREDICTION
plt.figure(figsize=(14,6))
plt.plot(best_y_test, label="Actual Price", color="blue")
plt.plot(best_y_pred, label="Predicted Price", color="red", linestyle="--")
plt.title(f"Best LSTM Model Prediction (RMSE={best_rmse:.2f})")
plt.xlabel("Time Index")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import ta


# 1. LOAD DATA
base_path = Path(r"C:\Users\moham\Downloads\scrape\text\crypto_project\data")
eth_file = base_path / "eth_cleaned.csv"
news_file = base_path / "daily_news_aggregated.csv"

eth_df = pd.read_csv(eth_file)
daily_news = pd.read_csv(news_file)

eth_df["date"] = pd.to_datetime(eth_df["date"], errors="coerce")
daily_news["date"] = pd.to_datetime(daily_news["date"], errors="coerce")

start_date = pd.to_datetime("2023-01-12")
end_date = pd.to_datetime("2025-10-16")

eth_df = eth_df[(eth_df["date"] >= start_date) & (eth_df["date"] <= end_date)].copy()
daily_news = daily_news[(daily_news["date"] >= start_date) & (daily_news["date"] <= end_date)].copy()

eth_df = eth_df.sort_values("date").reset_index(drop=True)
daily_news = daily_news.sort_values("date").reset_index(drop=True)

# Merge
df = pd.merge(eth_df, daily_news, on="date", how="left")
df["news_count"] = df["news_count"].fillna(0)
df["mean_sentiment"] = df["mean_sentiment"].fillna(0)


# 2. TECHNICAL INDICATORS
df["rsi"] = ta.momentum.RSIIndicator(close=df["eth_close"], window=14).rsi()
macd = ta.trend.MACD(close=df["eth_close"])
df["macd"] = macd.macd()
df["macd_signal"] = macd.macd_signal()
df["macd_diff"] = macd.macd_diff()
df["sma_20"] = df["eth_close"].rolling(20).mean()
df["sma_50"] = df["eth_close"].rolling(50).mean()
bb = ta.volatility.BollingerBands(close=df["eth_close"], window=20, window_dev=2)
df["bb_upper"] = bb.bollinger_hband()
df["bb_lower"] = bb.bollinger_lband()
df["bb_middle"] = bb.bollinger_mavg()

df["target_next_close"] = df["eth_close"].shift(-1)
df = df.dropna().reset_index(drop=True)


# 3. FEATURES + SCALING
features = ["rsi","macd","macd_signal","macd_diff","sma_20","sma_50",
            "bb_upper","bb_lower","bb_middle","news_count","mean_sentiment"]

X = df[features].values
y = df["target_next_close"].values.reshape(-1,1)

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

# 4. FUNCTION TO CREATE SEQUENCES
def create_sequences(X, y, time_steps):
    Xs, ys = [], []
    for i in range(len(X)-time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

# 5. HYPERPARAMETER OPTIONS
# time_steps_list = [10, 20, 30]
# lstm_units_list = [32, 64, 128]
# dropout_list = [0.1, 0.2, 0.3]
# batch_size_list = [16, 32]
# epochs_list = [50, 100]
# learning_rate_list = [0.001, 0.0005]

#test parameters
time_steps_list = [10]
lstm_units_list = [32]
dropout_list = [0.1]
batch_size_list = [8]
epochs_list = [50]
learning_rate_list = [0.01]


# 6. LOOP THROUGH HYPERPARAMETERS
results = []
best_rmse = np.inf
best_model = None
best_X_test, best_y_test = None, None
best_y_pred = None

for time_steps in time_steps_list:
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    train_size = int(len(X_seq)*0.8)
    X_train, X_test = X_seq[:train_size], X_seq[train_size:]
    y_train, y_test = y_seq[:train_size], y_seq[train_size:]
    
    for lstm_units in lstm_units_list:
        for dropout_rate in dropout_list:
            for batch_size in batch_size_list:
                for epochs in epochs_list:
                    for lr in learning_rate_list:
                        # Build model
                        model = Sequential()
                        model.add(LSTM(lstm_units, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
                        model.add(Dropout(dropout_rate))
                        model.add(Dense(1))
                        model.compile(optimizer=Adam(learning_rate=lr), loss="mse")
                        
                        # Train model
                        history = model.fit(
                            X_train, y_train,
                            validation_split=0.1,
                            epochs=epochs,
                            batch_size=batch_size,
                            verbose=0,
                            shuffle=False
                        )
                        
                        # Predict
                        y_pred_scaled = model.predict(X_test)
                        y_pred = scaler_y.inverse_transform(y_pred_scaled)
                        y_test_actual = scaler_y.inverse_transform(y_test)
                        
                        rmse = np.sqrt(mean_squared_error(y_test_actual, y_pred))
                        r2 = r2_score(y_test_actual, y_pred)
                        
                        # Store results
                        results.append({
                            "time_steps": time_steps,
                            "lstm_units": lstm_units,
                            "dropout": dropout_rate,
                            "batch_size": batch_size,
                            "epochs": epochs,
                            "learning_rate": lr,
                            "RMSE": rmse,
                            "R2": r2
                        })
                        
                        print(f"Done: time_steps={time_steps}, units={lstm_units}, dropout={dropout_rate}, batch={batch_size}, epochs={epochs}, lr={lr} | RMSE={rmse:.2f}, R2={r2:.4f}")
                        
                        # Update best model
                        if rmse < best_rmse:
                            best_rmse = rmse
                            best_model = model
                            best_X_test = X_test
                            best_y_test = y_test_actual
                            best_y_pred = y_pred

# 7. RESULTS DATAFRAME
results_df = pd.DataFrame(results)
print(results_df.sort_values("RMSE").head(10))


# 8. SAVE BEST MODEL
best_model.save("best_lstm_model.h5")
print("Best model saved as best_lstm_model.h5")


# 9. PLOT BEST MODEL PREDICTION
plt.figure(figsize=(14,6))
plt.plot(best_y_test, label="Actual Price", color="blue")
plt.plot(best_y_pred, label="Predicted Price", color="red", linestyle="--")
plt.title(f"Best LSTM Model Prediction (RMSE={best_rmse:.2f})")
plt.xlabel("Time Index")
plt.ylabel("ETH Closing Price")
plt.legend()
plt.grid(True)
plt.show()
