In [22]:
#Import Required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [23]:
#Load the dataset
df=pd.read_csv("/content/Crop_recommendationV2_with_soil_fertilizer_irrigation (1).csv")
print(df.head())

    N   P   K  temperature   humidity        ph    rainfall crop_type  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536      rice   
1  85  58  41    21.770462  80.319644  7.038096  226.655537      rice   
2  60  55  44    23.004459  82.320763  7.840207  263.964248      rice   
3  74  35  40    26.491096  80.158363  6.980401  242.864034      rice   
4  78  58  44    26.800796  80.886848  5.108682  284.436457      rice   

   soil_moisture  sunlight_exposure  ...  co2_concentration  organic_matter  \
0      29.446064           8.677355  ...         435.611226        3.121395   
1      12.851183           5.754288  ...         401.451860        2.142021   
2      29.363913           9.875230  ...         357.417963        1.474974   
3      26.207732           8.023685  ...         363.694305        8.393907   
4      15.696491           5.962473  ...         359.042795        6.142806   

   crop_density  pest_pressure  urban_area_proximity  frost_risk  \
0     11.743910   

In [24]:
#Separate Feature and Target
x = df.drop(columns=['crop_type'])
y = df['crop_type']

In [25]:
# Encoding Catogorical Columns
le=LabelEncoder()
for col in x.select_dtypes(include=['object']).columns:
    x[col] = le.fit_transform(x[col])

In [26]:
# Encode Target Labels
le_crop = LabelEncoder()
y = le_crop.fit_transform(y)

In [27]:
# Feature Selection using Random Forest
rf_fs = RandomForestClassifier(n_estimators=100, random_state=42)
rf_fs.fit(x, y)

In [28]:
# Feature Selection (selecting important features)
feature_importances = pd.Series(rf_fs.feature_importances_, index=x.columns)
top_features = feature_importances.sort_values(ascending=False).head(8).index.tolist()

In [29]:
print("Top features selected for crop prediction:")
print(top_features)

Top features selected for crop prediction:
['humidity', 'rainfall', 'K', 'P', 'N', 'temperature', 'ph', 'pest_pressure']


In [30]:
x_sf = df[top_features].copy()

In [31]:
#Scaling Feature
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x_sf)

In [33]:
# spliting the data for training and testing
x_train,x_test,y_train,y_test=train_test_split(x_scaled,y,test_size=0.4)

In [36]:
# Random Forest Model
rf=RandomForestClassifier(n_estimators=100)
rf.fit(x_train,y_train)

In [37]:
y_pred=rf.predict(x_test)

In [38]:
print("\n RandomForest Results:")
print("Crop Prediction Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


 RandomForest Results:
Crop Prediction Accuracy: 0.99375
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        46
           2       0.97      1.00      0.98        32
           3       1.00      1.00      1.00        37
           4       1.00      1.00      1.00        34
           5       1.00      1.00      1.00        32
           6       1.00      1.00      1.00        30
           7       1.00      1.00      1.00        35
           8       0.92      1.00      0.96        33
           9       1.00      1.00      1.00        36
          10       0.97      1.00      0.99        36
          11       1.00      1.00      1.00        42
          12       1.00      1.00      1.00        44
          13       1.00      0.95      0.98        43
          14       1.00      1.00      1.00        37
          15       1.00      1.00      1.00        41

In [39]:
# Gradient Boosting Model
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb.fit(x_train, y_train)
y_pred_gb = gb.predict(x_test)

In [40]:
print("\n Gradient Boosting Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))


 Gradient Boosting Results:
Accuracy: 0.9775
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.98        30
           1       1.00      1.00      1.00        46
           2       1.00      0.94      0.97        32
           3       1.00      1.00      1.00        37
           4       1.00      0.97      0.99        34
           5       1.00      1.00      1.00        32
           6       0.97      1.00      0.98        30
           7       1.00      1.00      1.00        35
           8       0.84      0.97      0.90        33
           9       1.00      1.00      1.00        36
          10       0.92      1.00      0.96        36
          11       1.00      0.98      0.99        42
          12       1.00      0.95      0.98        44
          13       0.95      0.93      0.94        43
          14       1.00      0.95      0.97        37
          15       1.00      1.00      1.00        41
          1

In [41]:
rf_accuracy = accuracy_score(y_test, y_pred)
gb_accuracy = accuracy_score(y_test, y_pred_gb)

In [42]:
# 10. Model Comparison Table

comparison_table = pd.DataFrame({
    'Model': ['Random Forest', 'Gradient Boosting'],
    'Accuracy': [rf_accuracy, gb_accuracy]
})

print("\nModel Comparison Summary")
print(comparison_table)


Model Comparison Summary
               Model  Accuracy
0      Random Forest   0.99375
1  Gradient Boosting   0.97750
