In [1]:
import pandas as pd

# Load preprocessed files
df = pd.read_csv("final_cleaned_data.csv")
X = pd.read_csv("final_input_features.csv")
y_kharif = pd.read_csv("target_kharif_yield.csv")
y_rabi = pd.read_csv("target_rabi_yield.csv")
y_price = pd.read_csv("target_market_price.csv")


In [2]:
from sklearn.model_selection import train_test_split

datasets_cb = {}

for target_name, y in zip(
    ['Kharif_Yield', 'Rabi_Yield', 'Market_Price'],
    [y_kharif, y_rabi, y_price]
):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    datasets_cb[target_name] = {
        'X_train': X_train, 'X_test': X_test,
        'y_train': y_train, 'y_test': y_test
    }

print("✅ Data ready for CatBoost training.")


✅ Data ready for CatBoost training.


In [3]:
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Dictionary to store CatBoost models and results
catboost_models = {}
catboost_results = {}

for target, data in datasets_cb.items():
    print(f"🔹 Training CatBoost model for: {target}")

    model = CatBoostRegressor(
        iterations=1000,
        learning_rate=0.05,
        depth=6,
        verbose=0,
        random_state=42
    )
    model.fit(data['X_train'], data['y_train'])

    # Make predictions
    preds = model.predict(data['X_test'])
    
    # Evaluate
    r2 = r2_score(data['y_test'], preds)
    rmse = np.sqrt(mean_squared_error(data['y_test'], preds))

    catboost_models[target] = model
    catboost_results[target] = {'R2': r2, 'RMSE': rmse}

    print(f"✅ {target} — R² Score: {r2:.3f}, RMSE: {rmse:.3f}")


ModuleNotFoundError: No module named 'catboost'