In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, RobustScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline
from joblib import dump

In [2]:
# Load dataset
df = pd.read_csv('fertilizer_recommendation_dataset.csv')

In [3]:
# Feature lists
numeric_cols = ['Temperature', 'Moisture', 'Rainfall', 'PH',
                'Nitrogen', 'Phosphorous', 'Potassium', 'Carbon']

In [4]:
# Encode categorical features
df = pd.get_dummies(df, columns=['Soil'], drop_first=True)

In [5]:
# Label encode 'Crop'
le_crop = LabelEncoder()
df['Crop'] = le_crop.fit_transform(df['Crop'])

In [6]:
# Save Crop label encoder
dump(le_crop, 'crop_label_encoder.joblib')

['crop_label_encoder.joblib']

In [7]:
# Create mapping of Fertilizer to Remark for later use
fertilizer_to_remark = df.drop_duplicates(subset=['Fertilizer']).set_index('Fertilizer')['Remark'].to_dict()

In [8]:
# Drop 'Remark' column for training
df = df.drop(columns=['Remark'])

In [9]:
# Define input and output
X = df.drop('Fertilizer', axis=1)
y = df['Fertilizer']

In [10]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [11]:
# Define preprocessing and model pipeline
pipeline = Pipeline([
    ('scaler', RobustScaler()),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

In [12]:
# Train the pipeline
pipeline.fit(X_train, y_train)

In [13]:
# Predictions
y_pred = pipeline.predict(X_test)


In [14]:
# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9854838709677419

Classification Report:
                             precision    recall  f1-score   support

   Balanced NPK Fertilizer       0.91      1.00      0.95        31
                   Compost       0.99      0.99      0.99        75
                       DAP       1.00      1.00      1.00       211
General Purpose Fertilizer       1.00      1.00      1.00         6
                    Gypsum       1.00      0.45      0.62        11
                      Lime       1.00      1.00      1.00        36
         Muriate of Potash       1.00      1.00      1.00        65
        Organic Fertilizer       0.90      1.00      0.95        19
                      Urea       1.00      1.00      1.00        31
Water Retaining Fertilizer       0.98      0.99      0.99       135

                  accuracy                           0.99       620
                 macro avg       0.98      0.94      0.95       620
              weighted avg       0.99      0.99      0.98   

In [15]:
# Save the model and mapping
dump({
    'model': pipeline,
    'fertilizer_to_remark': fertilizer_to_remark
}, 'fertilizer_model_bundle.joblib')


['fertilizer_model_bundle.joblib']

In [16]:
# Example prediction function
def predict_fertilizer(input_dict):
    input_df = pd.DataFrame([input_dict])

    # One-hot encode 'Soil' (manually align with training columns)
    for soil in ['Soil_Alkaline Soil', 'Soil_Loamy Soil', 'Soil_Neutral Soil', 'Soil_Peaty Soil']:
        input_df[soil] = 0
    soil_col = f"Soil_{input_dict['Soil']}"
    if soil_col in input_df.columns:
        input_df[soil_col] = 1
    input_df = input_df.drop('Soil', axis=1)

    # Encode crop
    input_df['Crop'] = le_crop.transform([input_dict['Crop']])[0]

    # Predict
    pred_fertilizer = pipeline.predict(input_df)[0]
    remark = fertilizer_to_remark.get(pred_fertilizer, "No remark available.")
    return pred_fertilizer, remark

In [18]:
# Example usage
# input_features = {
#     'Temperature': 25.0,
#     'Moisture': 0.7,
#     'Rainfall': 200.0,
#     'PH': 6.5,
#     'Nitrogen': 80,
#     'Phosphorous': 60,
#     'Potassium': 100,
#     'Carbon': 1.2,
#     'Soil': 'Loamy Soil',
#     'Crop': 'rice'
# }
# fert, remark = predict_fertilizer(input_features)
# print("Recommended Fertilizer:", fert)
# print("Remark:", remark)