In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np


In [3]:
fao = pd.read_csv("/content/drive/MyDrive/AgriYield_Data/FAOSTAT_data_en_11-19-2025.csv")
crop = pd.read_csv("/content/drive/MyDrive/AgriYield_Data/Crop_recommendation.csv")

print("FAOSTAT shape:", fao.shape)
print("Crop Recommendation shape:", crop.shape)

print("\nFAOSTAT columns:\n", fao.columns.tolist())
print("\nCrop Recommendation columns:\n", crop.columns.tolist())


FAOSTAT shape: (5940, 15)
Crop Recommendation shape: (2200, 8)

FAOSTAT columns:
 ['Domain Code', 'Domain', 'Area Code (M49)', 'Area', 'Element Code', 'Element', 'Item Code (CPC)', 'Item', 'Year Code', 'Year', 'Unit', 'Value', 'Flag', 'Flag Description', 'Note']

Crop Recommendation columns:
 ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label']


In [4]:
fao_items = sorted(fao['Item'].unique())
crop_labels = sorted(crop['label'].unique())

print("FAOSTAT unique crops:", len(fao_items))
print("Crop Recommendation unique crops:", len(crop_labels))


FAOSTAT unique crops: 85
Crop Recommendation unique crops: 22


In [5]:
faostat_to_label = {
    'Rice, paddy': 'rice',
    'Maize': 'maize',
    'Chick peas': 'chickpea',
    'Beans, dry': 'kidneybeans',
    'Pigeon peas': 'pigeonpeas',
    'Lentils': 'lentil',
    'Bananas': 'banana',
    'Apples': 'apple',
    'Oranges': 'orange',
    'Mangoes, mangosteens, guavas': 'mango',
    'Grapes': 'grapes',
    'Pomegranates': 'pomegranate',
    'Seed cotton': 'cotton',
    'Jute': 'jute',
    'Watermelons': 'watermelon',
    'Melons, other (inc.cantaloupes)': 'muskmelon',
    'Barley': 'barley',
    'Sorghum': 'sorghum'
}


In [17]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [18]:
crop_df = pd.read_csv(
    "/content/drive/MyDrive/AgriYield_Data/Crop_recommendation.csv"
)

print("Dataset shape:", crop_df.shape)
crop_df.head()


Dataset shape: (2200, 8)


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [19]:
print("Columns:\n", crop_df.columns.tolist())
print("\nMissing values per column:\n")
print(crop_df.isnull().sum())

print("\nUnique crops (labels):", crop_df['label'].nunique())
print("\nCrop names:\n", sorted(crop_df['label'].unique()))


Columns:
 ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label']

Missing values per column:

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

Unique crops (labels): 22

Crop names:
 ['apple', 'banana', 'blackgram', 'chickpea', 'coconut', 'coffee', 'cotton', 'grapes', 'jute', 'kidneybeans', 'lentil', 'maize', 'mango', 'mothbeans', 'mungbean', 'muskmelon', 'orange', 'papaya', 'pigeonpeas', 'pomegranate', 'rice', 'watermelon']


In [20]:
X = crop_df.drop('label', axis=1)
y = crop_df['label']

print("Feature matrix shape:", X.shape)
print("Target shape:", y.shape)


Feature matrix shape: (2200, 7)
Target shape: (2200,)


In [21]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("Encoded labels (first 10):", y_encoded[:10])
print("\nLabel mapping:")
for i, crop in enumerate(label_encoder.classes_):
    print(f"{i} â†’ {crop}")


Encoded labels (first 10): [20 20 20 20 20 20 20 20 20 20]

Label mapping:
0 â†’ apple
1 â†’ banana
2 â†’ blackgram
3 â†’ chickpea
4 â†’ coconut
5 â†’ coffee
6 â†’ cotton
7 â†’ grapes
8 â†’ jute
9 â†’ kidneybeans
10 â†’ lentil
11 â†’ maize
12 â†’ mango
13 â†’ mothbeans
14 â†’ mungbean
15 â†’ muskmelon
16 â†’ orange
17 â†’ papaya
18 â†’ pigeonpeas
19 â†’ pomegranate
20 â†’ rice
21 â†’ watermelon


In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

print("Training samples:", X_train.shape[0])
print("Testing samples:", X_test.shape[0])


Training samples: 1760
Testing samples: 440


In [23]:
rf_model = RandomForestClassifier(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

print("âœ… Random Forest training completed")


âœ… Random Forest training completed


In [24]:
y_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("ðŸŽ¯ Model Accuracy:", round(accuracy * 100, 2), "%")

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


ðŸŽ¯ Model Accuracy: 99.55 %

Classification Report:

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       1.00      1.00      1.00        20
   blackgram       1.00      0.95      0.97        20
    chickpea       1.00      1.00      1.00        20
     coconut       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.95      1.00      0.98        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      orange       1.00    

In [25]:
# Example soil & climate values
sample_input = pd.DataFrame([{
    'N': 90,
    'P': 42,
    'K': 43,
    'temperature': 20.8,
    'humidity': 82.0,
    'ph': 6.5,
    'rainfall': 202.9
}])

prediction_encoded = rf_model.predict(sample_input)
prediction_crop = label_encoder.inverse_transform(prediction_encoded)

print("ðŸŒ± Recommended Crop:", prediction_crop[0])


ðŸŒ± Recommended Crop: rice
