In [None]:
import joblib
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from xgboost import XGBRegressor

# Define transformers
cat_features = ["Dist Name", "Crop"]
num_features = ["Year", "Area (1000 ha)", "Production (1000 tons)",
                "temp_avg", "temp_max", "temp_min", "humidity", "rainfall"]

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_features),
        ("cat", categorical_transformer, cat_features)
    ]
)

# Full pipeline
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", XGBRegressor(n_estimators=500, learning_rate=0.05,
                           max_depth=6, random_state=42))
])

# Fit pipeline
pipeline.fit(X_train, y_train)

# Save pipeline
joblib.dump(pipeline, "yield_prediction_model.pkl")

# Later load and predict
pipeline = joblib.load("yield_prediction_model.pkl")
y_pred = pipeline.predict(X_test)
