In [76]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [77]:
data = pd.read_csv('Crop_Dataset.csv')
print(data.head())

    N   P   K  temperature   humidity        ph    rainfall  Total_Nutrients  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536              175   
1  85  58  41    21.770462  80.319644  7.038096  226.655537              184   
2  60  55  44    23.004459  82.320763  7.840207  263.964248              159   
3  74  35  40    26.491096  80.158363  6.980401  242.864034              149   
4  78  42  42    20.130175  81.604873  7.628473  262.717340              162   

   Temperature_Humidity  Log_Rainfall  Label  Label_Encoded  
0           1712.196283      5.317804  wheat              0  
1           1748.595734      5.427834  wheat              0  
2           1893.744627      5.579595  wheat              0  
3           2123.482908      5.496611  wheat              0  
4           1642.720357      5.574878  wheat              0  


In [78]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   N                     2200 non-null   int64  
 1   P                     2200 non-null   int64  
 2   K                     2200 non-null   int64  
 3   temperature           2200 non-null   float64
 4   humidity              2200 non-null   float64
 5   ph                    2200 non-null   float64
 6   rainfall              2200 non-null   float64
 7   Total_Nutrients       2200 non-null   int64  
 8   Temperature_Humidity  2200 non-null   float64
 9   Log_Rainfall          2200 non-null   float64
 10  Label                 2200 non-null   object 
 11  Label_Encoded         2200 non-null   int64  
dtypes: float64(6), int64(5), object(1)
memory usage: 206.4+ KB
None


In [79]:
print(data.isnull().sum())

N                       0
P                       0
K                       0
temperature             0
humidity                0
ph                      0
rainfall                0
Total_Nutrients         0
Temperature_Humidity    0
Log_Rainfall            0
Label                   0
Label_Encoded           0
dtype: int64


In [80]:
scaler = StandardScaler()
numeric_columns = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'Total_Nutrients', 'Temperature_Humidity', 'Log_Rainfall']
data[numeric_columns] = scaler.fit_transform(data[numeric_columns])

print(data.head())

          N         P         K  temperature  humidity        ph  rainfall  \
0  1.068797 -0.344551 -0.101688    -0.935587  0.472666  0.043302  1.810361   
1  0.933329  0.140616 -0.141185    -0.759646  0.397051  0.734873  2.242058   
2  0.255986  0.049647 -0.081939    -0.515898  0.486954  1.771510  2.921066   
3  0.635298 -0.556811 -0.160933     0.172807  0.389805  0.660308  2.537048   
4  0.743673 -0.344551 -0.121436    -1.083647  0.454792  1.497868  2.898373   

   Total_Nutrients  Temperature_Humidity  Log_Rainfall  Label  Label_Encoded  
0         0.287062             -0.203138      1.483789  wheat              0  
1         0.399702             -0.151079      1.685576  wheat              0  
2         0.086813              0.056511      1.963897  wheat              0  
3        -0.038343              0.385081      1.811709  wheat              0  
4         0.124359             -0.302501      1.955246  wheat              0  


In [81]:
X = data.drop(columns=['Label', 'Label_Encoded'])
y = data['Label_Encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [82]:
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

In [83]:
joblib.dump(rf_classifier, 'crop_recommendation_model.joblib')

['crop_recommendation_model.joblib']

In [84]:
y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 99.09%


In [85]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        26
           3       1.00      1.00      1.00        20
           4       1.00      1.00      1.00        23
           5       1.00      0.96      0.98        24
           6       1.00      1.00      1.00        19
           7       1.00      1.00      1.00        20
           8       0.92      1.00      0.96        11
           9       1.00      1.00      1.00        23
          10       1.00      1.00      1.00        21
          11       1.00      1.00      1.00        19
          12       1.00      1.00      1.00        14
          13       1.00      1.00      1.00        19
          14       1.00      1.00      1.00        17
          15       1.00      1.00      1.00        23
          16       1.00      1.00      1.00        14
          17       1.00    

In [86]:
model = joblib.load('crop_recommendation_model.joblib')

new_conditions = pd.DataFrame({
    'N': [50],
    'P': [40],
    'K': [30],
    'temperature': [14],
    'humidity': [70],
    'ph': [6.5],
    'rainfall': [200],
    'Total_Nutrients': [120],
    'Temperature_Humidity': [1600],
    'Log_Rainfall': [5.3]
})

new_conditions_scaled = scaler.transform(new_conditions)
new_prediction = model.predict(new_conditions_scaled)



In [87]:
distinct_labels = data['Label'].unique()
label_mapping = {i: label for i, label in enumerate(distinct_labels)}

predicted_crop = label_mapping.get(new_prediction[0])
print('Recommended crop:', predicted_crop)

Recommended crop: cauliflower
