In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib

# Data Exploration
# Load the data
def load_data(symbol):
    return pd.read_csv(f"data/historical_data/{symbol}_USDT_historical_data.csv")

btc_data = load_data("BTC")
eth_data = load_data("ETH")
sol_data = load_data("SOL")

def plot_statistics(data, symbol):
    data.describe().plot(kind='bar', title=f"{symbol} Data Statistics")
    plt.show()

plot_statistics(btc_data, "BTC")
plot_statistics(eth_data, "ETH")
plot_statistics(sol_data, "SOL")

# Display basic statistics
print("BTC Data Statistics:")
print(btc_data.describe())
print("ETH Data Statistics:")
print(eth_data.describe())
print("SOL Data Statistics:")
print(sol_data.describe())

# Feature Engineering
# ... (code to create new features, if any)

# Model Tuning
# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# Function to train and evaluate a model
def train_and_evaluate(data, features, target):
    X = data[features]
    y = data[target]
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize classifier
    gbc = GradientBoostingClassifier()
    
    # Grid search for best parameters
    grid_search = GridSearchCV(estimator=gbc, param_grid=param_grid, cv=3)
    grid_search.fit(X_train, y_train)
    best_params = grid_search.best_params_
    
    # Train the model with best parameters
    gbc = GradientBoostingClassifier(**best_params)
    gbc.fit(X_train, y_train)
    
    # Make predictions
    y_pred = gbc.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    
    return gbc

# Train and evaluate models for each cryptocurrency
btc_model = train_and_evaluate(btc_data, ['feature1', 'feature2', ...], 'target')
eth_model = train_and_evaluate(eth_data, ['feature1', 'feature2', ...], 'target')
sol_model = train_and_evaluate(sol_data, ['feature1', 'feature2', ...], 'target')

# Save the models
joblib.dump(btc_model, "models/btc_gradient_boost_model.pkl")
joblib.dump(eth_model, "models/eth_gradient_boost_model.pkl")
joblib.dump(sol_model, "models/sol_gradient_boost_model.pkl")