In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import pickle

# LOAD DATASET
df = pd.read_csv("Crop_recommendation.csv")

# FEATURES AND TARGET
X = df.drop("label", axis=1)   # N, P, K, temperature, humidity, ph, rainfall
y = df["label"]

# ENCODE LABELS (rice → 0, maize → 1 …)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# TRAIN/TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# CREATE A PIPELINE (Scaler + RandomForest)
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(
        n_estimators=300, 
        max_depth=20, 
        random_state=42
    ))
])

# TRAIN MODEL
pipeline.fit(X_train, y_train)

# EVALUATE
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")

# SAVE MODEL + LABEL ENCODER
pickle.dump(pipeline, open("crop_pipeline.pkl", "wb"))
pickle.dump(le, open("label_encoder.pkl", "wb"))

print("Model and label encoder saved successfully.")


Model Accuracy: 99.32%
Model and label encoder saved successfully.


In [2]:
pipeline_test = pickle.load(open("crop_pipeline.pkl", "rb"))
le_test = pickle.load(open("label_encoder.pkl", "rb"))

# Example test input (taken from first row)
sample = np.array([[90, 42, 43, 20.879744, 82.002744, 6.502985, 202.935536]])

# Predict (raw + crop name)
raw_pred = pipeline_test.predict(sample)[0]
crop_name = le_test.inverse_transform([raw_pred])[0]

print("\nTest Prediction:")
print("Raw output:", raw_pred)
print("Predicted Crop:", crop_name)


NameError: name 'np' is not defined

In [None]:
df.sample(5)