In [None]:
##  


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visual styling
plt.style.use('seaborn')
plt.rcParams = {'inline': '5'}  # Force default parameter settings for the inline
plt.figure(figsize=(10.5, 8))
sns.set style to be consistent with the rest of this section and app"
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import numpy as np
import pandas_ta as ta
import ccxt
import yaml
import os
import logging

client = ccxt.binance()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()


ModuleNotFoundError: No module named 'sklearn'

In [None]:
def get_data(symbol: str, interval: str):
    data = client.fetch_ohlcv(symbol, interval, limit=5000)

    df = pd.DataFrame(data, columns=['timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
    df.set_index('timestamp', inplace=True)
    df.index = pd.to_datetime(df.index, unit='ms')
    df.index = df.index.tz_localize('UTC').tz_convert('Africa/Cairo')

    df['S_SMA'] = ta.sma(df['Close'], 80)
    df['f_SMA'] = ta.sma(df['Close'], 20)
    df['RSI'] = ta.rsi(df['Close'], 14)
    
    df.dropna(axis=0, inplace=True)
    return df

df = get_data('SOLUSDT', '1m')

df


In [None]:
forcast_days = 30

df['Prediction'] = df[['Close']].shift(-forcast_days)

X = df.drop(['Prediction'], axis=1)[:-forcast_days]
y = np.array(df['Prediction'])[:-forcast_days]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Models
models = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(),
    'GradientBoosting': GradientBoostingRegressor(),
}

# Fine-Tune hyperparameters
model_grids = {
    'LinearRegression': {},
    'RandomForest': {
        'n_estimators': [100, 150],
        'max_depth': [None, 10, 20]
    },
    'GradientBoosting': {
        'n_estimators': [100, 250],
        'learning_rate': [0.1, 0.05],
        'max_depth': [3, 10]
    }
}


In [None]:
def evaluate_model_with_gridsearch(name, model, grid, X_train, y_train, X_test, y_test):
    if grid:
        clf = GridSearchCV(model, grid, cv=3, scoring='r2', n_jobs=-1)
        clf.fit(X_train, y_train)
        best_model = clf.best_estimator_
        best_params = clf.best_params_
    else:
        model.fit(X_train, y_train)
        best_model = model
        best_params = model.get_params()
    
    y_pred = best_model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    return {
        'mae': mae,
        'mse': mse,
        'rmse': rmse,
        'r2': r2,
        'model': best_model,
        'params': best_params
    }



In [None]:
results = {}

for name, model in models.items():
    logger.info(f'Training {name}....')
    
    evaluation = evaluate_model_with_gridsearch(name, model, model_grids[name], X_train, y_train, X_test, y_test)
    results[name] = evaluation
    
    print(f'{name} R2: {evaluation["r2"]:.4f}, RMSE: {evaluation['rmse']:.2f}')
    

In [None]:
# Display information about the best model
best_model_name = max(results, key=lambda x: results[x]['r2'])
best_model = results[best_model_name]['model']
best_params = best_model.get_params()
best_r2 = float(results[best_model_name]['r2'])
best_mae = float(results[best_model_name]['mae'])
best_rmse = float(results[best_model_name]['rmse'])

print(f'Best Model: {best_model_name}')
print(f' R2 Score: {best_r2:.4f}')
print(f' MAE: {best_mae:.2f}')
print(f' RMSE: {best_rmse:.2f}')

model_config = {
    'model': {
        'name': 'crypto_forcasting',
        'best_model': best_model_name,
        'parameters': best_params,
        'r2_score': best_r2,
        'mae': best_mae,
        'target_variables': 'Predictions',
        'feature_sets': X.columns
    }
}

config_path = 'configs/model_config.yaml'
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, 'w') as f:
    yaml.dump(model_config, f)

print(f'Saved model config to {config_path}')


In [None]:
import pickle as pk


In [None]:
pk.dump(best_model, open('../model/models/rf_v1', 'wb'))


In [None]:
rf_v1 = pk.load(open('../model/models/rf_v1', 'rb'))


In [None]:
X_future = np.array(df.drop(['Prediction'], axis=1))[-forcast_days:]
future_predictions = rf_v1.predict(X_future)

print(future_predictions)
print(df['Close'].tail())
