In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio
from rasterio.plot import show
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import xgboost as xgb
import matplotlib.pyplot as plt

In [None]:
climate = pd.read_csv("datasets/climate.csv")
temperature = pd.read_csv("datasets/temperature.csv")
rainfall = pd.read_csv("datasets/rainfall.csv")
crop = pd.read_csv("datasets/crop.csv")
soil = pd.read_csv("datasets/soil.csv")

# Example: read shapefile (district boundaries)
nepal_shp = gpd.read_file("datasets/shapefile/nepal_districts.shp")
print(nepal_shp.head())


In [None]:
# -------------------------------
# 3️⃣ Merge datasets
# -------------------------------
data_model = (
    climate_dang
    .merge(temperature_dang, left_on="District", right_on="Station", how="left")
    .merge(soil_dang, on="District", how="left")
)

data_model["Yield"] = crop_dang.values


In [None]:
# -------------------------------
# 4️⃣ Prepare features & target
# -------------------------------
features = data_model.drop(columns=["Yield", "District", "Station"])
features = features.fillna(features.mean())
target = data_model["Yield"]


In [None]:
# -------------------------------
# 5️⃣ Train/Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)


In [None]:
# -------------------------------
# 6️⃣ Train XGBoost
# -------------------------------
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    "objective": "reg:squarederror",
    "eta": 0.1,
    "max_depth": 6,
    "subsample": 0.8,
    "colsample_bytree": 0.8
}

xgb_model = xgb.train(params, dtrain, num_boost_round=100)


In [None]:
# -------------------------------
# 7️⃣ Predict & Evaluate
# -------------------------------
predictions = xgb_model.predict(dtest)

rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)

print(f"✅ RMSE: {rmse}")
print(f"✅ R²: {r2}")


In [None]:
# -------------------------------
# 8️⃣ Feature Importance
# -------------------------------
xgb.plot_importance(xgb_model)
plt.show()
