In [None]:
import os # https://docs.python.org/3/library/os.html
import pandas as pd # https://pandas.pydata.org/docs/
import numpy as np # https://numpy.org/doc/
import matplotlib.pyplot as plt # https://matplotlib.org/stable/index.html
from autots import AutoTS # https://pypi.org/project/autots/
from sklearn.preprocessing import LabelEncoder # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score # https://scikit-learn.org/stable/api/sklearn.metrics.html

folder_path = "" # Path to a folder with cryptocurrencies 

# https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
# https://pandas.pydata.org/docs/reference/api/pandas.concat.
# List all CSV files
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
dfs = []
for csv_file in csv_files:
    # Read each CSV and concatenate them into a single DataFrame
    file_path = os.path.join(folder_path, csv_file) # Load CSV
    df = pd.read_csv(file_path)
    df["Symbol"] = os.path.splitext(csv_file)[0] # Use filename as symbol
    dfs.append(df)

# Combine all CSVs into one DataFrame
full_df = pd.concat(dfs, ignore_index=True) # https://pandas.pydata.org/docs/reference/api/pandas.concat.html
full_df['Date'] = pd.to_datetime(full_df['Date'], errors='coerce')
full_df.dropna(subset=['Date'], inplace=True)

# Encode the 'Symbol' column to numerical values
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
encoder = LabelEncoder()
full_df['Symbol_Encoded'] = encoder.fit_transform(full_df['Symbol'])

# Forecasting Parameters
forecast_length = 7  # Predicting first 7 days (15,30 days)

# Initialize containers for forecasts and evaluation metrics
summary_list = [] 
all_predictions = []
all_actual_vs_predicted = []

# Loop over each crypto asset (by encoded symbol)
for symbol, symbol_df in full_df.groupby("Symbol"):
    symbol_df = symbol_df.sort_values(by="Date").reset_index(drop=True)

    # 80/20 Split
    split_index = int(len(symbol_df) * 0.8)
    train_df = symbol_df.iloc[:split_index]
    test_df = symbol_df.iloc[split_index:]

    # Ensure test set is at least 7/15/30 days long
    if len(test_df) < forecast_length:
        print(f"Skipping {symbol} - not enough test data.")
        continue

    # Initialize AutoTS model
    # https://pypi.org/project/autots/
    # https://winedarksea.github.io/AutoTS/build/html/source/tutorial.html
    model = AutoTS(
        forecast_length=forecast_length,
        frequency="infer",
        prediction_interval=0.9,
        ensemble="None",
        model_list='scalable',
        transformer_list="all",
        max_generations=5,
        num_validations=5,
        validation_method="backwards"  # Acts like cross-validation on train set
    )
    model = model.fit(train_df, date_col="Date", value_col="Close")

    # Predict on the entire test set
    prediction = model.predict()
    forecast = prediction.forecast.iloc[:, 0]

    # Evaluate first 7/15/30 days of test set
    eval_actual = test_df["Close"].iloc[:forecast_length].reset_index(drop=True)
    eval_dates = test_df["Date"].iloc[:forecast_length].reset_index(drop=True)

    # Save evaluation results
    df_eval = pd.DataFrame({
        "Date": eval_dates,
        "Symbol": symbol,
        "Actual": eval_actual,
        "Predicted": forecast.values
    })
    all_actual_vs_predicted.append(df_eval)

    # Plot test predictions
    # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
    # https://matplotlib.org/stable/tutorials/index.html
    plt.figure(figsize=(12, 6))
    plt.plot(symbol_df["Date"], symbol_df["Close"], label="Full Price History", color="blue", linewidth=1.5)
    plt.plot(eval_dates, eval_actual, label="Actual (First 7)", color="green", linewidth=2)
    plt.plot(eval_dates, forecast.values, label="Predicted (First 7)", color="red", linestyle="--", linewidth=2)
    plt.title(f"{symbol} - Actual vs Predicted (First 7 Days of Test Set)")
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    # Calculate test metrics
    y_true = eval_actual.values
    y_pred = forecast.values
    mae = mean_absolute_error(y_true, y_pred) # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html
    rmse = np.sqrt(mean_squared_error(y_true, y_pred)) # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
    r2 = r2_score(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100 # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html

    # Build final forecast table for this crypto
    summary_list.append({
        "Cryptocurrency": symbol,
        "MAE": mae,
        "RMSE": rmse,
        "R² Score": r2,
        "MAPE (%)": mape
    })

    # Full 7/15/30-day forecast saved
    forecast_df = prediction.forecast.reset_index()
    forecast_df["Symbol"] = symbol
    all_predictions.append(forecast_df)

# Summary Metrics Table
summary_df = pd.DataFrame(summary_list).sort_values("MAPE (%)")
print("\nModel Performance Summary:")
print(summary_df)

# Actual vs Predicted (7/15/30 days)
results_df = pd.concat(all_actual_vs_predicted, ignore_index=True)
print("\nActual vs Predicted (7 Days):")
print(results_df)

future_forecasts_df = pd.concat(all_predictions, ignore_index=True)
print("\n7-Day Future Forecasts:")
print(future_forecasts_df.head())

# Save CSVs
summary_df.to_csv("AutoTS_7d_metrics.csv", index=False)
results_df.to_csv("AutoTS_7d_predictions.csv", index=False)
future_forecasts_df.to_csv("AutoTS_7d_forecast.csv", index=False)
