In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
import xgboost
import lightgbm
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [3]:
df = pd.read_csv("dataset.csv")

In [4]:
df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,33,66,33,Sandy,Barley,17,19,13,17-17-17
1,31,67,43,Clayey,Paddy,10,0,21,28-28
2,29,54,36,Black,Oil seeds,17,1,11,20-20
3,29,57,44,Clayey,Pulses,11,16,35,14-35-14
4,28,62,59,Loamy,Sugarcane,20,6,16,DAP


In [5]:

X = df.drop('Fertilizer Name', axis=1)
y = df['Fertilizer Name']

In [6]:

X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2,
                                                      random_state=0)

In [7]:
from sklearn.preprocessing import OneHotEncoder

In [8]:

categorical_features = ['Soil Type', 'Crop Type']
numerical_features = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']

numerical_transformer = SimpleImputer(strategy='mean')
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:

from sklearn.preprocessing import LabelEncoder


le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_valid_encoded = le.transform(y_valid)

model1=xgboost.XGBClassifier()
randpipe=Pipeline(steps=[('preprocessor', preprocessor),
                              ('model', model1)])
randpipe.fit(X_train,y_train_encoded)
preds1=randpipe.predict(X_valid)

preds1_decoded = le.inverse_transform(preds1)

print("accuracy score=",accuracy_score(y_valid_encoded,preds1))

accuracy score= 0.71


In [22]:

new_data = pd.DataFrame({
    'Temparature': [30],
    'Humidity': [69],
    'Moisture': [31],
    'Soil Type': ['Black'],
    'Crop Type': ['Sugercane'],
    'Nitrogen': [15],
    'Potassium': [16],
    'Phosphorous': [15]
})


prediction_encoded = randpipe.predict(new_data)

prediction = le.inverse_transform(prediction_encoded)

print("Prediction for the new data:", prediction)

Prediction for the new data: ['17-17-17']


In [23]:
import pickle

with open("randpipe.pkl", "wb") as f:
    pickle.dump(randpipe, f)


In [24]:
import pickle
from sklearn.preprocessing import LabelEncoder

# These are the exact 7 fertilizer classes from your training data
fertilizer_classes = ['10-26-26', '14-35-14', '17-17-17', '20-20', '28-28', 'DAP', 'Urea']

# Create and fit the LabelEncoder
le_fertilizer = LabelEncoder()
le_fertilizer.fit(fertilizer_classes)

# Save it
with open('fert_label_encoder.pkl', 'wb') as f:
    pickle.dump(le_fertilizer, f)

print("✅ LabelEncoder saved successfully as 'fert_label_encoder.pkl'")
print(f"Classes: {list(le_fertilizer.classes_)}")
print(f"Encoding mapping:")
for i, fert in enumerate(le_fertilizer.classes_):
    print(f"  {i} -> {fert}")

✅ LabelEncoder saved successfully as 'fert_label_encoder.pkl'
Classes: [np.str_('10-26-26'), np.str_('14-35-14'), np.str_('17-17-17'), np.str_('20-20'), np.str_('28-28'), np.str_('DAP'), np.str_('Urea')]
Encoding mapping:
  0 -> 10-26-26
  1 -> 14-35-14
  2 -> 17-17-17
  3 -> 20-20
  4 -> 28-28
  5 -> DAP
  6 -> Urea
