In [3]:
from datadealing.dataloader import DataLoader
from datasets.datasets import data_to_load, data_timepoints
from models import *

# Create dataloader (same as you used for GNN)
dataloader = DataLoader(
    raw_data=data_to_load,
    data_timepoints=data_timepoints,
    val_ratio=0.2,
    test_ratio=0.2,
    standardization_method='zscore',
    window_size=4,
    include_weeks=True,
    graph_type='identity',
    threshold=0
)

# Initialize models
lr_model = LinearRegressionModel(dataloader, updates=True)
rf_model = RandomForestModel(dataloader, n_estimators=100, updates=True)
xgb_model = XGBoostModel(dataloader, n_estimators=1000, 
                         learning_rate=0.05, 
                         max_depth = 8,
                         subsample = 0.8,
                         colsample_bytree = 0.8,
                         updates=True)

# Train models
print("Training Linear Regression...")
lr_model.train()

print("Training Random Forest...")
rf_model.train()

print("Training XGBoost...")
xgb_model.train()

# Evaluate models
print("Evaluating models...")
lr_model.evaluate()
rf_model.evaluate()
xgb_model.evaluate()

# Compare results
print(f"Linear Regression RMSE: {lr_model.testing_history['rmse']:.4f}")
print(f"Random Forest RMSE: {rf_model.testing_history['rmse']:.4f}")
print(f"XGBoost RMSE: {xgb_model.testing_history['rmse']:.4f}")

# Plot predictions for Budapest (county_idx=4)
lr_model.plot_predictions(county_idx=4)
rf_model.plot_predictions(county_idx=4)
xgb_model.plot_predictions(county_idx=4)


✅ Created 5 temporal features
Time 4: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 5: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 6: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 7: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 8: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 9: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 10: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 11: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 12: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 13: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 14: case_features shape: (20, 4), temporal shape: (20, 5), combined: (20, 9)
Time 15: case_features shape: (20, 4), temporal shape: (20, 5), combined: 

KeyboardInterrupt: 

In [2]:
dataloader.train_dataset

[Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[20], edge_weight=[20]),
 Data(x=[20, 4], edge_index=[2, 20], y=[

In [2]:
# Create dataloader
dataloader = DataLoader(
    raw_data=data_to_load,
    val_ratio=0.2,
    test_ratio=0.2,
    standardization_method='zscore',
    window_size=1
)

# Define parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.05, 0.1, 0.15],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Run grid search
grid_search = XGBoostGridSearch(
    dataloader=dataloader,
    param_grid=param_grid,
    verbose=True
)

grid_search.fit()

# Get results
results_df = grid_search.get_results_dataframe()
print("Top 5 results:")
print(results_df.head())

# Get best model
best_model = grid_search.get_best_model()
best_model.evaluate()

# Save results
grid_search.save_results("xgboost_optimization_results.json")


Starting grid search with 108 combinations...
Using 3-fold cross-validation
------------------------------------------------------------
[1/108] Testing: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 100, 'subsample': 0.8}
  CV Score: 0.7665
  Val Score: 0.6564
  Best So Far: 0.6564
----------------------------------------
[2/108] Testing: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 100, 'subsample': 1.0}
  CV Score: 0.7741
  Val Score: 0.6593
  Best So Far: 0.6564
----------------------------------------
[3/108] Testing: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200, 'subsample': 0.8}
  CV Score: 0.7736
  Val Score: 0.6628
  Best So Far: 0.6564
----------------------------------------
[4/108] Testing: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200, 'subsample': 1.0}
  CV Score: 0.7784
  Val Score: 0.6690
  Best So Far: 0.6564
------------