In [None]:
import os # https://docs.python.org/3/library/os.html
import pandas as pd # https://pandas.pydata.org/docs/user_guide/10min.html
import numpy as np # https://numpy.org/
import matplotlib.pyplot as plt # https://matplotlib.org/
import datetime # https://docs.python.org/3/library/datetime.html
import gc # https://docs.python.org/3/library/gc.html

from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error # https://scikit-learn.org/stable/api/sklearn.metrics.html
from sklearn.model_selection import KFold # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
from sklearn.preprocessing import MinMaxScaler # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html

import autokeras as ak # https://autokeras.com
from tensorflow.keras import backend as K # https://blog.keras.io/keras-as-a-simplified-interface-to-tensorflow-tutorial.html
import matplotlib.dates as mdates # https://matplotlib.org/stable/api/dates_api.html

# Parameters
WINDOW_SIZE = 30
FORECAST_LENGTH = 7
N_FOLDS = 5
EPOCHS = 10
BATCH_SIZE = 16

# Load Data
# https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
# https://pandas.pydata.org/docs/reference/api/pandas.concat.
# List all CSV files in the folder
folder_path = '' # Your file path
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
dfs = [pd.read_csv(os.path.join(folder_path, f)) for f in csv_files]
full_df = pd.concat(dfs, ignore_index=True)

# Clean datetime
full_df['Date'] = pd.to_datetime(full_df['Date'], errors='coerce') # https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html
full_df.dropna(subset=['Date'], inplace=True) # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html

# Save original for real Close and Date access
original_full_df = full_df.copy()

# Loop over each crypto asset (by encoded symbol)
full_df['Symbol_Encoded'] = full_df['Symbol'].astype('category').cat.codes # https://pandas.pydata.org/docs/reference/api/pandas.Series.cat.codes.html
symbol_map = full_df[['Symbol', 'Symbol_Encoded']].drop_duplicates()

summary_list = []
actual_predicted_data = []


symbol_map['Symbol_Encoded'] = symbol_map['Symbol_Encoded'].astype(float) # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html

# Loop over each crypto asset (by encoded symbol)
for symbol_id in clean_df['Symbol_Encoded'].unique(): # https://pandas.pydata.org/docs/reference/api/pandas.unique.html
    symbol_id = float(symbol_id)
    symbol_row = symbol_map[symbol_map['Symbol_Encoded'] == symbol_id]
    if symbol_row.empty:
        print(f"Symbol ID {symbol_id} not found in symbol_map.")
        continue
    symbol_str = symbol_row['Symbol'].values[0]

    crypto_df = original_full_df[original_full_df['Symbol'] == symbol_str].copy()
    crypto_df = crypto_df.sort_values(by='Date').reset_index(drop=True) # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html

    if len(crypto_df) < WINDOW_SIZE + FORECAST_LENGTH:
        print(f"Skipping {symbol_name}, not enough data.")
        continue

    # Train/Test Split
    split_index = int(len(crypto_df) * 0.8)
    train_df = crypto_df.iloc[:split_index].copy() # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html
    test_df = crypto_df.iloc[split_index:].copy().reset_index(drop=True)

    feature_cols = [col for col in crypto_df.columns if col not in ['Date', 'Close', 'Name', 'Symbol']]

    # Scaling 
    # Separate scaler for the 'Close' column (target variable)
    close_scaler = MinMaxScaler()
    train_df['Close'] = close_scaler.fit_transform(train_df[['Close']])
    test_df['Close'] = close_scaler.transform(test_df[['Close']])

    # Apply the MinMaxScaler to the other features
    scaler = MinMaxScaler()
    train_df[feature_cols] = scaler.fit_transform(train_df[feature_cols])
    test_df[feature_cols] = scaler.transform(test_df[feature_cols])

    # Sequence Creation 
    def create_sequences(df, window, target='Close'):
        X, y, dates = [], [], []
        for i in range(len(df) - window):
            window_df = df.iloc[i:i+window]
            target_val = df.iloc[i+window][target]
            X.append(window_df[feature_cols].values)
            y.append(target_val)
            dates.append(df.iloc[i+window]['Date'])
        return np.array(X), np.array(y), dates

    X_all, y_all, _ = create_sequences(train_df, WINDOW_SIZE)

    # K-Fold Cross-Validation
    kf = KFold(n_splits=N_FOLDS)
    print(f"\nRunning {N_FOLDS}-fold CV for {symbol_str}...")

    fold = 1
    for train_index, val_index in kf.split(X_all):
        X_train, X_val = X_all[train_index], X_all[val_index]
        y_train, y_val = y_all[train_index], y_all[val_index]

        # AutoKeras model https://autokeras.com/auto_model/
        model = ak.AutoModel(
            inputs=ak.Input(shape=(WINDOW_SIZE, len(feature_cols))),
            outputs=ak.RegressionHead(),
            max_trials=5,
            tuner='greedy',
            project_name=f"ak_{symbol_str}_fold{fold}_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}" # https://docs.python.org/3.6/library/datetime.html
        )

        model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)

        best_model = model.export_model()
        val_pred = best_model.predict(X_val).flatten()
        val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
        print(f"Fold {fold} RMSE: {val_rmse:.3f}")

        del model, best_model
        K.clear_session() # https://www.tensorflow.org/api_docs/python/tf/keras/backend/clear_session
        gc.collect() # https://docs.python.org/3/library/gc.html
        fold += 1

    # Final Model Training 
    print(f"Training final model on full train set for {symbol_str}...")
    model_final = ak.AutoModel(
        inputs=ak.Input(shape=(WINDOW_SIZE, len(feature_cols))),
        outputs=ak.RegressionHead(),
        max_trials=1,
        tuner='greedy',
        project_name=f"ak_{symbol_name}_full_{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}"
    )
    model_final.fit(X_all, y_all, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=0)
    final_model = model_final.export_model()

    # 7-Day Forecast
    forecast_input = train_df.iloc[-WINDOW_SIZE:][feature_cols].values
    forecast_input = np.expand_dims(forecast_input, axis=0).astype(np.float32)

    # https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html
    y_pred_forecast = []

    for step in range(FORECAST_LENGTH):
        pred = final_model.predict(forecast_input).flatten()[0]
        y_pred_forecast.append(pred)

        # Move window: drop oldest, add new feature row from test
        new_features = test_df.iloc[step][feature_cols].values
        new_features = new_features.reshape(1, 1, -1).astype(np.float32)

        forecast_input = np.concatenate([forecast_input[:, 1:, :], new_features], axis=1)

    # Actual Close prices for comparison
    y_actual_forecast = test_df.iloc[:FORECAST_LENGTH]['Close'].values
    forecast_dates = test_df.iloc[:FORECAST_LENGTH]['Date'].values

    # Inverse scale the predicted 'Close' values (target)
    y_pred_forecast = close_scaler.inverse_transform(np.array(y_pred_forecast).reshape(-1, 1)).flatten()

    # Inverse scale the actual 'Close' values
    y_actual_forecast = close_scaler.inverse_transform(y_actual_forecast.reshape(-1, 1)).flatten()

    # Metrics
    mae = mean_absolute_error(y_actual_forecast, y_pred_forecast)
    mse = mean_squared_error(y_actual_forecast, y_pred_forecast) # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
    rmse = np.sqrt(mean_squared_error(y_actual_forecast, y_pred_forecast)) # https://numpy.org/doc/2.1/reference/generated/numpy.sqrt.html
    mape = mean_absolute_percentage_error(y_actual_forecast, y_pred_forecast) * 100 # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html

    # Build final forecast table for this crypto
    summary_list.append({ # https://docs.python.org/3/tutorial/datastructures.html
        "Symbol": symbol_str,
        "Test MAE": mae,
        "Test MSE": mse,
        "Test RMSE": rmse,
        "Test MAPE (%) 7d AutoKeras": mape
    })

    for date, actual, predicted in zip(forecast_dates, y_actual_forecast, y_pred_forecast):
        actual_predicted_data.append({
            "Symbol": symbol_str,
            "Date": date,
            "Actual": actual,
            "Predicted": predicted
        })

    # Plot predictions
    # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
    # https://matplotlib.org/stable/tutorials/index.html
    plt.figure(figsize=(12, 6))
    plt.plot(forecast_dates, y_actual_forecast, label="Actual", color='green')
    plt.plot(forecast_dates, y_pred_forecast, linestyle='dashed', color='red', label="Predicted")
    plt.title(f"{symbol_str} - 7-Day Forecast")
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.legend()
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
    plt.xticks(rotation=45)
    plt.grid()
    plt.tight_layout()
    plt.show()

    del model_final, final_model
    K.clear_session()
    gc.collect()

# Save Results
summary_df = pd.DataFrame(summary_list)
actual_predicted_df = pd.DataFrame(actual_predicted_data)

# Save as csv files
summary_df.to_csv("AutoKeras_7d_metrics.csv", index=False) # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
actual_predicted_df.to_csv("AutoKeras_7d_predictions.csv", index=False)

print("\nSummary Metrics:")
print(summary_df)
print("\nSample Predictions:")
print(actual_predicted_df.head())

