In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import os

# Load dataset
dataset_path = '../data/crop_dataset.csv'
try:
    df = pd.read_csv(dataset_path)
except FileNotFoundError:
    print("Error: CSV file not found. Check the path and file name!")
    raise

print("Dataset Preview:")
print(df.head())
print("\nColumns:", df.columns)

# Features and Target
X = df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
y = df['label']

# Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")

# Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy*100:.2f}%")

# Save Model
model_folder = '../models'
os.makedirs(model_folder, exist_ok=True)
model_path = os.path.join(model_folder, 'crop_model.pkl')
joblib.dump(model, model_path)
print(f"Trained model saved at: {model_path}")

# Test Prediction (Fixed)
sample_input = pd.DataFrame(
    [[90, 40, 40, 25, 80, 6.5, 200]],
    columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
)
predicted_crop = model.predict(sample_input)
print("Predicted Crop for Sample Input:", predicted_crop[0])


Dataset Preview:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice

Columns: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
Training samples: 1760
Testing samples: 440
Model Accuracy: 99.32%
Trained model saved at: ../models\crop_model.pkl
Predicted Crop for Sample Input: rice
