In [3]:
# Step 1: load / quick-clean / EDA
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

# if your CSV is in the same folder as notebook
PATH = "final_dataset_cleaned.csv"      
CLEAN_OUT = "final_dataset_cleaned_geo.csv"

# 1) load
df = pd.read_csv(PATH)
print("➤ raw shape:", df.shape)
print("➤ columns:", df.columns.tolist())


➤ raw shape: (2200, 10)
➤ columns: ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label', 'latitude', 'longitude']


In [5]:
# Step 2: Encode target + Train/Test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# columns
lat_col = 'latitude'
lon_col = 'longitude'
crop_col = 'label'

# Features (X) and target (y)
X = df[[lat_col, lon_col]].copy()
y = df[crop_col].copy()

# Encode crop labels into integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("➤ classes found:", list(label_encoder.classes_))
print("➤ sample encoded targets:", y_encoded[:10])

# Save encoder (important for later use in React Native app)
joblib.dump(label_encoder, "crop_label_encoder.joblib")
print("✔ label encoder saved as crop_label_encoder.joblib")

# Split train/test stratified (keeps crop distribution balanced)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

print("Train size:", X_train.shape, " Test size:", X_test.shape)


➤ classes found: ['apple', 'banana', 'blackgram', 'chickpea', 'coconut', 'coffee', 'cotton', 'grapes', 'jute', 'kidneybeans', 'lentil', 'maize', 'mango', 'mothbeans', 'mungbean', 'muskmelon', 'orange', 'papaya', 'pigeonpeas', 'pomegranate', 'rice', 'watermelon']
➤ sample encoded targets: [ 8  4  6  9 14 11  8  4 10  2]
✔ label encoder saved as crop_label_encoder.joblib
Train size: (1760, 2)  Test size: (440, 2)


In [7]:
# Step 3: Train RandomForest & build top-3 crop predictor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, top_k_accuracy_score
import numpy as np
import joblib

# Train model
rf = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train, y_train)

# Evaluate accuracy
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
top3_acc = top_k_accuracy_score(y_test, rf.predict_proba(X_test), k=3)

print("✔ RandomForest Trained")
print(f"Top-1 Accuracy: {acc:.4f}")
print(f"Top-3 Accuracy: {top3_acc:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Save model
joblib.dump(rf, "crop_rf_model.joblib")
print("✔ model saved as crop_rf_model.joblib")

# Function: predict top-3 crops from lat/lon
def predict_top3(lat, lon, model, encoder):
    X_new = np.array([[lat, lon]])
    probs = model.predict_proba(X_new)[0]
    top3_idx = np.argsort(probs)[-3:][::-1]
    top3_crops = [(encoder.classes_[i], probs[i]) for i in top3_idx]
    return top3_crops

# Example usage
example_lat, example_lon = X_test.iloc[0]
print("\nExample prediction for:", (example_lat, example_lon))
print(predict_top3(example_lat, example_lon, rf, label_encoder))


✔ RandomForest Trained
Top-1 Accuracy: 0.6682
Top-3 Accuracy: 0.9500

Classification Report:
               precision    recall  f1-score   support

       apple       1.00      1.00      1.00        20
      banana       0.73      0.40      0.52        20
   blackgram       1.00      0.60      0.75        20
    chickpea       0.68      0.75      0.71        20
     coconut       0.80      1.00      0.89        20
      coffee       0.81      0.85      0.83        20
      cotton       1.00      0.35      0.52        20
      grapes       0.18      0.20      0.19        20
        jute       0.87      1.00      0.93        20
 kidneybeans       1.00      0.70      0.82        20
      lentil       0.61      0.85      0.71        20
       maize       1.00      0.85      0.92        20
       mango       0.68      0.85      0.76        20
   mothbeans       0.52      0.80      0.63        20
    mungbean       0.44      0.75      0.56        20
   muskmelon       1.00      0.05      0.

