In [1]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from catboost import CatBoostRegressor
import numpy as np

In [2]:
# Step 2: Load datasets
df1 = pd.read_csv(r"C:\Users\souri\Downloads\New folder\agriculture_dataset.csv")
df2 = pd.read_csv(r"C:\Users\souri\Downloads\New folder\Crop_recommendation.csv")

# 🔧 Clean crop names: lowercase and strip spaces
df1['Crop_Type'] = df1['Crop_Type'].str.strip().str.lower()
df2.rename(columns={'label': 'Crop_Type'}, inplace=True)
df2['Crop_Type'] = df2['Crop_Type'].str.strip().str.lower()

# Step 3: Merge datasets on 'Crop_Type'
df = pd.merge(df1, df2, on='Crop_Type', how='inner')
df = df.dropna()
df.head()

Unnamed: 0,Farm_ID,Crop_Type,Farm_Area(acres),Irrigation_Type,Fertilizer_Used(tons),Pesticide_Used(kg),Yield(tons),Soil_Type,Season,Water_Usage(cubic meters),N,P,K,temperature,humidity,ph,rainfall
0,F001,cotton,329.4,Sprinkler,8.14,2.21,14.44,Loamy,Kharif,76648.2,133,47,24,24.402289,79.19732,7.231325,90.802236
1,F001,cotton,329.4,Sprinkler,8.14,2.21,14.44,Loamy,Kharif,76648.2,136,36,20,23.095956,84.862757,6.925412,71.295811
2,F001,cotton,329.4,Sprinkler,8.14,2.21,14.44,Loamy,Kharif,76648.2,104,47,18,23.965635,76.976967,7.633437,90.756167
3,F001,cotton,329.4,Sprinkler,8.14,2.21,14.44,Loamy,Kharif,76648.2,133,47,23,24.887381,75.621372,6.827355,89.760504
4,F001,cotton,329.4,Sprinkler,8.14,2.21,14.44,Loamy,Kharif,76648.2,126,38,23,25.362438,83.632761,6.176716,88.436189


In [3]:
# Step 4: Preprocess data
df['Season'] = df['Season'].astype(str)
df['Crop_Type'] = df['Crop_Type'].astype(str)

encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(df[['Crop_Type', 'Season']])
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['Crop_Type', 'Season']))

# Final feature set
X = pd.concat([df[['Farm_Area(acres)']], encoded_df], axis=1)
y = df[['Fertilizer_Used(tons)', 'Pesticide_Used(kg)', 'Yield(tons)']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [4]:
# Step 5: Train separate models for each target
model_fertilizer = CatBoostRegressor(verbose=0)
model_pesticide = CatBoostRegressor(verbose=0)
model_yield = CatBoostRegressor(verbose=0)

model_fertilizer.fit(X_train, y_train['Fertilizer_Used(tons)'])
model_pesticide.fit(X_train, y_train['Pesticide_Used(kg)'])
model_yield.fit(X_train, y_train['Yield(tons)'])

<catboost.core.CatBoostRegressor at 0x26eb9041a90>

In [5]:
# Step 6: Prediction function (updated for separate models)
def predict_requirements(area, crop_type, season):
    input_df = pd.DataFrame([[area, crop_type.strip().lower(), season]],
                             columns=['Farm_Area(acres)', 'Crop_Type', 'Season'])
    encoded_input = encoder.transform(input_df[['Crop_Type', 'Season']])
    input_ready = pd.concat([
        pd.DataFrame([[area]], columns=['Farm_Area(acres)']),
        pd.DataFrame(encoded_input, columns=encoder.get_feature_names_out(['Crop_Type', 'Season']))
    ], axis=1)

    fert = model_fertilizer.predict(input_ready)[0]
    pest = model_pesticide.predict(input_ready)[0]
    yld = model_yield.predict(input_ready)[0]

    return {
        'Fertilizer_Tons': round(fert, 2),
        'Pesticide_Kgs': round(pest, 2),
        'Yield_Tons': round(yld, 2)
    }

In [7]:
# Step 7: Try prediction
example_output = predict_requirements(20, 'maize', 'Rabi')
print("Predicted Outputs:")
print(example_output)

Predicted Outputs:
{'Fertilizer_Tons': 4.88, 'Pesticide_Kgs': 1.54, 'Yield_Tons': 18.78}
