In [23]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
import pickle

In [24]:
df =  pd.read_csv('fertilizer_recommendation_dataset.csv')
df.head()

Unnamed: 0,Temperature,Moisture,Rainfall,PH,Nitrogen,Phosphorous,Potassium,Carbon,Soil,Crop,Fertilizer,Remark
0,50.179845,0.725893,205.600816,6.227358,66.701872,76.96356,96.429065,0.4963,Loamy Soil,rice,Compost,Enhances organic matter and improves soil stru...
1,21.633318,0.721958,306.081601,7.173131,71.583316,163.057636,148.128347,1.234242,Loamy Soil,rice,Balanced NPK Fertilizer,"Provides a balanced mix of nitrogen, phosphoru..."
2,23.060964,0.685751,259.336414,7.380793,75.70983,62.091508,80.308971,1.79565,Peaty Soil,rice,Water Retaining Fertilizer,Improves water retention in dry soils. Prefer ...
3,26.241975,0.755095,212.703513,6.883367,78.033687,151.012521,153.005712,1.517556,Loamy Soil,rice,Balanced NPK Fertilizer,"Provides a balanced mix of nitrogen, phosphoru..."
4,21.490157,0.730672,268.786767,7.57876,71.765123,66.257371,97.000886,1.782985,Peaty Soil,rice,Organic Fertilizer,"Enhances fertility naturally, ideal for peaty ..."


In [25]:
df = df.drop(columns=['Remark'])

In [26]:
df.head()

Unnamed: 0,Temperature,Moisture,Rainfall,PH,Nitrogen,Phosphorous,Potassium,Carbon,Soil,Crop,Fertilizer
0,50.179845,0.725893,205.600816,6.227358,66.701872,76.96356,96.429065,0.4963,Loamy Soil,rice,Compost
1,21.633318,0.721958,306.081601,7.173131,71.583316,163.057636,148.128347,1.234242,Loamy Soil,rice,Balanced NPK Fertilizer
2,23.060964,0.685751,259.336414,7.380793,75.70983,62.091508,80.308971,1.79565,Peaty Soil,rice,Water Retaining Fertilizer
3,26.241975,0.755095,212.703513,6.883367,78.033687,151.012521,153.005712,1.517556,Loamy Soil,rice,Balanced NPK Fertilizer
4,21.490157,0.730672,268.786767,7.57876,71.765123,66.257371,97.000886,1.782985,Peaty Soil,rice,Organic Fertilizer


In [27]:
df['Soil'].unique()

array(['Loamy Soil', 'Peaty Soil', 'Acidic Soil', 'Neutral Soil',
       'Alkaline Soil'], dtype=object)

In [28]:
df['Crop'].unique()

array(['rice', 'wheat', 'Mung Bean', 'Tea', 'millet', 'maize', 'Lentil',
       'Jute', 'Coffee', 'Cotton', 'Ground Nut', 'Peas', 'Rubber',
       'Sugarcane', 'Tobacco', 'Kidney Beans', 'Moth Beans', 'Coconut',
       'Black gram', 'Adzuki Beans', 'Pigeon Peas', 'Chickpea', 'banana',
       'grapes', 'apple', 'mango', 'muskmelon', 'orange', 'papaya',
       'pomegranate', 'watermelon'], dtype=object)

In [29]:
df['Fertilizer'].unique()

array(['Compost', 'Balanced NPK Fertilizer', 'Water Retaining Fertilizer',
       'Organic Fertilizer', 'Gypsum', 'Lime', 'DAP', 'Urea',
       'Muriate of Potash', 'General Purpose Fertilizer'], dtype=object)

In [30]:
df.isnull().sum()

Temperature    0
Moisture       0
Rainfall       0
PH             0
Nitrogen       0
Phosphorous    0
Potassium      0
Carbon         0
Soil           0
Crop           0
Fertilizer     0
dtype: int64

In [31]:
df.shape

(3100, 11)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3100 entries, 0 to 3099
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  3100 non-null   float64
 1   Moisture     3100 non-null   float64
 2   Rainfall     3100 non-null   float64
 3   PH           3100 non-null   float64
 4   Nitrogen     3100 non-null   float64
 5   Phosphorous  3100 non-null   float64
 6   Potassium    3100 non-null   float64
 7   Carbon       3100 non-null   float64
 8   Soil         3100 non-null   object 
 9   Crop         3100 non-null   object 
 10  Fertilizer   3100 non-null   object 
dtypes: float64(8), object(3)
memory usage: 266.5+ KB


In [33]:
X = df.drop(columns=['Crop'])
y = df['Crop']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

In [34]:
preprocesser = ColumnTransformer(transformers=[
    ('numeric', StandardScaler(), [0, 1, 2, 3, 4, 5, 6, 7]),
    ('categorical', OneHotEncoder(drop='first', handle_unknown='ignore'), [8, 9]),
])

In [35]:
model_rf = RandomForestClassifier(max_depth=10)

In [36]:
pipe_rf = Pipeline([
    ('featureScaling', preprocesser),
    ('model', model_rf)
])

In [37]:
pipe_rf.fit(X_train, y_train)

In [38]:
y_pred_rf = pipe_rf.predict(X_test)

In [39]:
print(accuracy_score(y_test, y_pred_rf))

0.5661290322580645


In [40]:
df.head()

Unnamed: 0,Temperature,Moisture,Rainfall,PH,Nitrogen,Phosphorous,Potassium,Carbon,Soil,Crop,Fertilizer
0,50.179845,0.725893,205.600816,6.227358,66.701872,76.96356,96.429065,0.4963,Loamy Soil,rice,Compost
1,21.633318,0.721958,306.081601,7.173131,71.583316,163.057636,148.128347,1.234242,Loamy Soil,rice,Balanced NPK Fertilizer
2,23.060964,0.685751,259.336414,7.380793,75.70983,62.091508,80.308971,1.79565,Peaty Soil,rice,Water Retaining Fertilizer
3,26.241975,0.755095,212.703513,6.883367,78.033687,151.012521,153.005712,1.517556,Loamy Soil,rice,Balanced NPK Fertilizer
4,21.490157,0.730672,268.786767,7.57876,71.765123,66.257371,97.000886,1.782985,Peaty Soil,rice,Organic Fertilizer


In [41]:
features = [[34, 0.7, 200, 6.87, 57, 88, 102, 1.2, 'Neutral Soil', 'General Purpose Fertilizer']]

test_prediction = pipe_rf.predict(features)



In [42]:
print(test_prediction)

['Black gram']


In [43]:
with open('model_rf.pkl', 'wb') as f:
    pickle.dump(pipe_rf, f)

In [45]:
fert = []
for i in df['Fertilizer']:
    fert.append(i)

In [49]:
fert = pd.DataFrame(fert)
fert = fert.drop_duplicates()

In [51]:
with open('fert.pkl', 'wb') as f:
    pickle.dump(fert, f)