In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

df = pd.read_csv('/content/crop_yield.csv')

bool_cols = ['Fertilizer_Used', 'Irrigation_Used']
df[bool_cols] = df[bool_cols].astype(int)


df_processed = pd.get_dummies(df, drop_first=True)


X = df_processed.drop('Yield_tons_per_hectare', axis=1)
y = df_processed['Yield_tons_per_hectare']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print("Training Random Forest...")
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
print(f"Random Forest R2 Score: {r2_score(y_test, rf_model.predict(X_test)):.4f}")

print("Training XGBoost...")
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', tree_method='hist', device='cuda', random_state=42)
xgb_model.fit(X_train, y_train)
print(f"XGBoost R2 Score: {r2_score(y_test, xgb_model.predict(X_test)):.4f}")

def predict_yield(input_data_dict, model=xgb_model):

    input_df = pd.DataFrame([input_data_dict])
    input_df[bool_cols] = input_df[bool_cols].astype(int)
    input_processed = pd.get_dummies(input_df, drop_first=True)
    input_processed = input_processed.reindex(columns=X.columns, fill_value=0)
    return model.predict(input_processed)[0]

sample_input = {
    'Region': 'North',
    'Soil_Type': 'Loam',
    'Crop': 'Barley',
    'Rainfall_mm': 148.0,
    'Temperature_Celsius': 29.8,
    'Fertilizer_Used': False,
    'Irrigation_Used': False,
    'Weather_Condition': 'Sunny',
    'Days_to_Harvest': 106
}

print(f"\nPredicted Yield (tons/hectare): {predict_yield(sample_input):.4f}")

Training Random Forest...
Random Forest R2 Score: 0.9075
Training XGBoost...
XGBoost R2 Score: 0.9124

Predicted Yield (tons/hectare): 1.3467


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


In [None]:
import joblib
joblib.dump(xgb_model, 'crop_yield_model.pkl')

joblib.dump(list(X.columns), 'model_columns.pkl')

print("Model and columns saved successfully!")

Model and columns saved successfully!
