In [6]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load data
file_path = "F:/Fasal/backend/Predictive Systems/notebooks/updated_crop_prices.csv"  # Ensure file exists in your working directory
df = pd.read_csv(file_path)
display(df.head())
# Drop 'Variety' column
df.drop(columns=['Variety'], inplace=True)

# Encode categorical features ('Commodity' and 'State')
encoder_commodity = LabelEncoder()
encoder_state = LabelEncoder()

df['Commodity'] = encoder_commodity.fit_transform(df['Commodity'])
df['State'] = encoder_state.fit_transform(df['State'])

# Define features (X) and target variable (y)
X = df.drop(columns=['2024-25'])  # Features: All columns except the target
y = df['2024-25']  # Target: Crop price for 2024-25

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost Regressor
xgb_model = XGBRegressor(
    n_estimators=300, 
    learning_rate=0.05, 
    max_depth=8, 
    subsample=0.8, 
    colsample_bytree=0.8, 
    random_state=42
)

xgb_model.fit(X_train, y_train)

# Predictions
y_pred = xgb_model.predict(X_test)

# Model evaluation
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")

# Save model as a .pkl file
with open("model.pkl", "wb") as f:
    pickle.dump(xgb_model, f)

print("Model saved as model.pkl")


Unnamed: 0,Commodity,Variety,2013-14,2014-15,2015-16,2016-17,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24,2024-25,State,Annual Rainfall (mm)
0,Paddy,Common,1310,1360,1410,1470,1550,1750,1815,1868,1940,2040,2183,2300,West Bengal,1800
1,Paddy,Grade A,1345,1400,1450,1510,1590,1770,1835,1888,1960,2060,2203,2320,West Bengal,1800
2,Jowar,Hybrid,1500,1530,1570,1625,1700,2430,2550,2620,2738,2970,3180,3371,Maharashtra,1200
3,Jowar,Maldandi,1520,1550,1590,1650,1725,2450,2570,2640,2758,2990,3225,3421,Maharashtra,1200
4,Bajra,,1250,1250,1275,1330,1425,1950,2000,2150,2250,2350,2500,2625,Rajasthan,400


R² Score: 0.1450
RMSE: 2941.37
Model saved as model.pkl
