Bibliotecas

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso 
import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error

import os
import sys
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime


Preparação dos Dados

In [12]:
def get_bitcoin_data():
    url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart"
    params = {
        'vs_currency': 'usd',
        'days': '365',
        'interval': 'daily'
    }
    response = requests.get(url, params=params)
    data = response.json()
    return data

def prepare_data():
    data = get_bitcoin_data()
    prices = data['prices']


    df = pd.DataFrame(prices, columns=['timestamp', 'price'])


    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)

    return df

df = prepare_data()

def create_features(df):
    # Criar lags de 1 e 7 dias
    df['lag_1'] = df['price'].shift(1)
    df['lag_7'] = df['price'].shift(7)


    df['ma_7'] = df['price'].rolling(window=7).mean()

    df = df.dropna()

    return df

df_features = create_features(df)

X = df_features[['lag_1', 'lag_7', 'ma_7']]
y = df_features['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# scaler = StandardScaler()

# X_train = scaler.fit_transform(X_train)

# X_test = scaler.transform(X_test)


df_features.to_parquet(f".\\historico_bitcoin\\bitcoin_data{datetime.datetime.now().strftime("%Y%m%d")}.parquet", index=True)

Parâmetros para RIDGE e LASSO

In [None]:
alphas = [0.01, 0.1, 1.0, 10.0]
solvers = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg']  
max_iters = [100, 500, 1000]  

ridge_metrics = {}
lasso_metrics = {}

for alpha in alphas:
    for solver in solvers:
        ridge_model = Ridge(alpha=alpha, solver=solver)
        ridge_model.fit(X_train, y_train)
        y_pred = ridge_model.predict(X_test)
        ridge_metrics[(alpha, solver)] = {
            'MSE': mean_squared_error(y_test, y_pred),
            'R²': r2_score(y_test, y_pred)
        }

for alpha in alphas:
    for max_iter in max_iters:
        lasso_model = Lasso(alpha=alpha, max_iter=max_iter)
        lasso_model.fit(X_train, y_train)
        y_pred = lasso_model.predict(X_test)
        lasso_metrics[(alpha, max_iter)] = {
            'MSE': mean_squared_error(y_test, y_pred),
            'R²': r2_score(y_test, y_pred)
        }

ridge_metrics_df = pd.DataFrame(ridge_metrics).T
lasso_metrics_df = pd.DataFrame(lasso_metrics).T

print("Métricas da Regressão Ridge:")
print(ridge_metrics_df)

print("\nMétricas da Regressão Lasso:")
print(lasso_metrics_df)

best_params_ridge = ridge_metrics_df['MSE'].idxmin()
best_params_lasso = lasso_metrics_df['MSE'].idxmin()

print("\nMelhores Parâmetros da Regressão Ridge:")
print(best_params_ridge)

print("\nMelhores Parâmetros da Regressão Lasso:")
print(best_params_lasso)


Parâmetros GradientBoosting

In [None]:


n_estimators = [50, 100, 200]
max_depth = [3, 5, 7]
learning_rates = [0.01, 0.1, 0.2]
min_samples_split = [2, 5, 10]

gb_metrics = {}

for n in n_estimators:
    for depth in max_depth:
        for lr in learning_rates:
            for min_split in min_samples_split:
                gb_model = GradientBoostingRegressor(
                    n_estimators=n,
                    max_depth=depth,
                    learning_rate=lr,
                    min_samples_split=min_split
                )
                gb_model.fit(X_train, y_train)
                y_pred = gb_model.predict(X_test)
                gb_metrics[(n, depth, lr, min_split)] = {
                    'MSE': mean_squared_error(y_test, y_pred),
                    'R²': r2_score(y_test, y_pred)
                }

gb_metrics_df = pd.DataFrame(gb_metrics).T
print("Métricas do Gradient Boosting:")
print(gb_metrics_df)

best_params_gb = gb_metrics_df['MSE'].idxmin()
print("\nMelhores Parâmetros:")
print(best_params_gb)


Parâmetros xgboost

In [None]:


n_estimators = [50, 100, 200]
learning_rates = [0.01, 0.1, 0.2]
max_depth = [3, 5, 7]
subsample = [0.8, 0.9, 1.0]

xgb_metrics = {}

for n in n_estimators:
    for lr in learning_rates:
        for depth in max_depth:
            for subs in subsample:
                xgb_model = xgb.XGBRegressor(
                    objective='reg:squarederror',
                    n_estimators=n,
                    learning_rate=lr,
                    max_depth=depth,
                    subsample=subs
                )
                xgb_model.fit(X_train, y_train)
                y_pred = xgb_model.predict(X_test)
                xgb_metrics[(n, lr, depth, subs)] = {
                    'MSE': mean_squared_error(y_test, y_pred),
                    'R²': r2_score(y_test, y_pred)
                }

xgb_metrics_df = pd.DataFrame(xgb_metrics).T
print("Métricas do XGBoost:")
print(xgb_metrics_df)

best_params_xgb = xgb_metrics_df['MSE'].idxmin()
print("\nMelhores Parâmetros:")
print(best_params_xgb)


Parâmetros MLPRegressor

In [None]:
hidden_layer_sizes = [(50,), (100,), (100, 50), (100, 100), (50, 50, 50)]
activation = ['relu', 'tanh', 'logistic']
alpha = [0.0001, 0.001, 0.01]
solver = ['lbfgs', 'sgd', 'adam']

nn_metrics = {}

for layers in hidden_layer_sizes:
    for act in activation:
        for alpha_val in alpha:
            for solver_method in solver:
                try:
                    nn_model = MLPRegressor(
                        hidden_layer_sizes=layers,
                        activation=act,
                        alpha=alpha_val,
                        solver=solver_method,
                        max_iter=1000,
                        verbose=True,
                        learning_rate_init=0.01
                    )
                    nn_model.fit(X_train, y_train)
                    y_pred = nn_model.predict(X_test)
                    nn_metrics[(layers, act, alpha_val, solver_method)] = {
                        'MSE': mean_squared_error(y_test, y_pred),
                        'R²': r2_score(y_test, y_pred)
                    }
                except Exception as e:
                    print(f"Error with parameters {layers}, {act}, {alpha_val}, {solver_method}: {e}")

nn_metrics_df = pd.DataFrame(nn_metrics).T
print("Métricas do MLP Regressor:")
print(nn_metrics_df)

best_params_nn = nn_metrics_df['MSE'].idxmin()
print("\nMelhores Parâmetros:")
print(best_params_nn)

Treinamento dos Modelos

In [None]:

#regressão linear
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

best_alpha_ridge, best_solver_ridge = ridge_metrics_df['MSE'].idxmin()
best_alpha_lasso, best_iters_lasso = lasso_metrics_df['MSE'].idxmin()

# Regressão Ridge
ridge_model = Ridge(alpha=best_alpha_ridge, solver=best_solver_ridge)
ridge_model.fit(X_train, y_train)

# Regressão Lasso
lasso_model = Lasso(alpha=best_alpha_lasso,  max_iter=best_iters_lasso)
lasso_model.fit(X_train, y_train)

best_params_gb = gb_metrics_df['MSE'].idxmin()

best_n_estimators, best_max_depth, best_learning_gb, best_sample_gb = best_params_gb

# Gradient Boosting
gb_model = GradientBoostingRegressor(n_estimators=n,max_depth=depth,learning_rate=lr,min_samples_split=min_split)
gb_model.fit(X_train, y_train)

best_params_xgb = xgb_metrics_df['MSE'].idxmin()

best_n_estimators, best_learning_rate, best_detph_xgb, best_subsample  = best_params_xgb

# XGBoost
xgb_model = xgb.XGBRegressor(objective='reg:squarederror',n_estimators=best_n_estimators,learning_rate=best_learning_rate,max_depth=best_detph_xgb,subsample=best_subsample)
xgb_model.fit(X_train, y_train)

best_params_nn = nn_metrics_df['MSE'].idxmin()

best_hidden_layer_sizes, best_activation, alpha_val,solver_method = best_params_nn

# MLP Regressor
nn_model = MLPRegressor(hidden_layer_sizes=best_hidden_layer_sizes, activation=best_activation, max_iter=1000, alpha=alpha_val, solver=solver_method)
nn_model.fit(X_train, y_train)

Resultados com os Modelos

In [21]:
last_row = df_features.iloc[-1]

new_data = [[last_row['lag_1'], last_row['lag_7'], last_row['ma_7']]]

tomorrow_price = linear_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (linear_model): {tomorrow_price[0]:.2f}')
tomorrow_price = ridge_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (ridge_model): {tomorrow_price[0]:.2f}')
tomorrow_price = lasso_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (lasso_model): {tomorrow_price[0]:.2f}')
tomorrow_price = gb_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (gb_model): {tomorrow_price[0]:.2f}')
tomorrow_price = xgb_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (xgb_model): {tomorrow_price[0]:.2f}')
tomorrow_price = nn_model.predict(new_data)
print(f'Previsão do preço do Bitcoin para amanhã (nn_model): {tomorrow_price[0]:.2f}')


Previsão do preço do Bitcoin para amanhã (linear_model): 823842800.10
Previsão do preço do Bitcoin para amanhã (ridge_model): 823746711.91
Previsão do preço do Bitcoin para amanhã (lasso_model): 823888976.19
Previsão do preço do Bitcoin para amanhã (gb_model): 68877.68
Previsão do preço do Bitcoin para amanhã (xgb_model): 69156.23
Previsão do preço do Bitcoin para amanhã (nn_model): 1425658725.83


Comparativo dos Modelos

In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100  # Percentual
    return mse, rmse, mae, r2, mape

mse_lr, rmse_lr, mae_lr, r2_lr, mape_lr = evaluate_model(linear_model, X_test, y_test)

mse_ridge, rmse_ridge, mae_ridge, r2_ridge, mape_ridge = evaluate_model(ridge_model, X_test, y_test)

mse_lasso, rmse_lasso, mae_lasso, r2_lasso, mape_lasso = evaluate_model(lasso_model, X_test, y_test)

mse_gb, rmse_gb, mae_gb, r2_gb, mape_gb = evaluate_model(gb_model, X_test, y_test)

mse_xgb, rmse_xgb, mae_xgb, r2_xgb, mape_xgb = evaluate_model(xgb_model, X_test, y_test)

mse_nn, rmse_nn, mae_nn, r2_nn, mape_nn = evaluate_model(nn_model, X_test, y_test)

print("Desempenho dos Modelos:")
print(f"Linear Regression: MSE = {mse_lr:.4f}, RMSE = {rmse_lr:.4f}, MAE = {mae_lr:.4f}, R² = {r2_lr:.4f}, MAPE = {mape_lr:.2f}%")
print(f"Ridge Regression: MSE = {mse_ridge:.4f}, RMSE = {rmse_ridge:.4f}, MAE = {mae_ridge:.4f}, R² = {r2_ridge:.4f}, MAPE = {mape_ridge:.2f}%")
print(f"Lasso Regression: MSE = {mse_lasso:.4f}, RMSE = {rmse_lasso:.4f}, MAE = {mae_lasso:.4f}, R² = {r2_lasso:.4f}, MAPE = {mape_lasso:.2f}%")
print(f"Gradient Boosting: MSE = {mse_gb:.4f}, RMSE = {rmse_gb:.4f}, MAE = {mae_gb:.4f}, R² = {r2_gb:.4f}, MAPE = {mape_gb:.2f}%")
print(f"XGBoost: MSE = {mse_xgb:.4f}, RMSE = {rmse_xgb:.4f}, MAE = {mae_xgb:.4f}, R² = {r2_xgb:.4f}, MAPE = {mape_xgb:.2f}%")
print(f"MLP Regressor: MSE = {mse_nn:.4f}, RMSE = {rmse_nn:.4f}, MAE = {mae_nn:.4f}, R² = {r2_nn:.4f}, MAPE = {mape_nn:.2f}%")



In [None]:
import json

model_metrics = {
    'Linear Regression': {'MSE': mse_lr, 'RMSE': rmse_lr, 'MAE': mae_lr, 'R2': r2_lr, 'MAPE': mape_lr},
    'Ridge Regression': {'MSE': mse_ridge, 'RMSE': rmse_ridge, 'MAE': mae_ridge, 'R2': r2_ridge, 'MAPE': mape_ridge},
    'Lasso Regression': {'MSE': mse_lasso, 'RMSE': rmse_lasso, 'MAE': mae_lasso, 'R2': r2_lasso, 'MAPE': mape_lasso},
    'Gradient Boosting': {'MSE': mse_gb, 'RMSE': rmse_gb, 'MAE': mae_gb, 'R2': r2_gb, 'MAPE': mape_gb},
    'XGBoost': {'MSE': mse_xgb, 'RMSE': rmse_xgb, 'MAE': mae_xgb, 'R2': r2_xgb, 'MAPE': mape_xgb},
    'MLP Regressor': {'MSE': mse_nn, 'RMSE': rmse_nn, 'MAE': mae_nn, 'R2': r2_nn, 'MAPE': mape_nn}
}

model_metrics_json = json.dumps(model_metrics, indent=4)

print(model_metrics_json)

with open('model_metrics.json', 'w') as json_file:
    json.dump(model_metrics, json_file, indent=4)