In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [2]:
# Load data
file_path = "F:/Fasal/backend/Predictive Systems/notebooks/crop_prices.csv"  # Ensure file exists in your working directory
df = pd.read_csv(file_path)
display(df.head())

Unnamed: 0,Commodity,Variety,2013-14,2014-15,2015-16,2016-17,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24,2024-25,State,Annual Rainfall (mm)
0,Wheat,Common,1350,1400,1450,1525,1625,1735,1840,1925,2015,2125,2275,2425,Punjab,650
1,Rice,Basmati,2500,2600,2750,2900,3100,3300,3500,3700,3900,4100,4350,4600,Punjab,650
2,Maize,Hybrid,1310,1360,1400,1450,1520,1600,1700,1800,1900,2000,2225,2500,Punjab,650
3,Paddy,Common,1310,1360,1410,1470,1550,1750,1815,1868,1940,2040,2183,2325,Punjab,650
4,Wheat,Common,1350,1400,1450,1525,1625,1735,1840,1925,2015,2125,2275,2425,Haryana,600


In [3]:
# Drop 'Variety' column
df.drop(columns=['Variety'], inplace=True)

# Encode categorical features ('Commodity' and 'State')
encoder_commodity = LabelEncoder()
encoder_state = LabelEncoder()

df['Commodity'] = encoder_commodity.fit_transform(df['Commodity'])
df['State'] = encoder_state.fit_transform(df['State'])

In [4]:
X = df.drop(columns=['2024-25'])  # Features: All columns except the target
y = df['2024-25']  # Target: Crop price for 2025-26


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


xgb_model = XGBRegressor(
    n_estimators=300, 
    learning_rate=0.05, 
    max_depth=8, 
    subsample=0.8, 
    colsample_bytree=0.8, 
    random_state=42
)

xgb_model.fit(X_train, y_train)


y_pred = xgb_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")

R² Score: 0.9108
RMSE: 582.01


In [5]:
with open("model.pkl", "wb") as f:
    pickle.dump(xgb_model, f)

print("Model saved as model.pkl")

Model saved as model.pkl
