In [47]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import pickle

In [2]:
dataset = pd.read_csv('Fertilizer Prediction.csv')

In [52]:
dataset_copy = pd.read_csv('Fertilizer Prediction.csv')

In [3]:
dataset.shape

(99, 9)

In [4]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [5]:
dataset.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [7]:
# categorical column
categorical_column = [col for col in dataset.columns if dataset[col].dtype== 'O']

In [8]:
categorical_column

['Soil Type', 'Crop Type', 'Fertilizer Name']

In [9]:
dataset['Soil Type'].value_counts()

Loamy     21
Sandy     20
Clayey    20
Black     19
Red       19
Name: Soil Type, dtype: int64

In [10]:
for i in categorical_column:
    print(i)
    print(dataset[i].unique())
    

Soil Type
['Sandy' 'Loamy' 'Black' 'Red' 'Clayey']
Crop Type
['Maize' 'Sugarcane' 'Cotton' 'Tobacco' 'Paddy' 'Barley' 'Wheat' 'Millets'
 'Oil seeds' 'Pulses' 'Ground Nuts']
Fertilizer Name
['Urea' 'DAP' '14-35-14' '28-28' '17-17-17' '20-20' '10-26-26']


In [11]:
def encode1(column_name):
    dict_lable = dict()
    labels_list = [lable for lable in dataset[column_name].unique()]   
    for i, label in enumerate(labels_list):
        dict_lable[label] = i
    return dict_lable

In [12]:
soil_type_dict =encode1('Soil Type')

In [13]:
soil_type_dict

{'Sandy': 0, 'Loamy': 1, 'Black': 2, 'Red': 3, 'Clayey': 4}

In [15]:
dataset['Soil Type'] = dataset['Soil Type'].replace(soil_type_dict)

In [16]:
crop_type_dict = encode1('Crop Type')

In [17]:
crop_type_dict

{'Maize': 0,
 'Sugarcane': 1,
 'Cotton': 2,
 'Tobacco': 3,
 'Paddy': 4,
 'Barley': 5,
 'Wheat': 6,
 'Millets': 7,
 'Oil seeds': 8,
 'Pulses': 9,
 'Ground Nuts': 10}

In [18]:
dataset['Crop Type'] = dataset['Crop Type'].replace(crop_type_dict)

In [19]:
fertilizer_dict = encode1('Fertilizer Name')

In [20]:
fertilizer_dict

{'Urea': 0,
 'DAP': 1,
 '14-35-14': 2,
 '28-28': 3,
 '17-17-17': 4,
 '20-20': 5,
 '10-26-26': 6}

In [21]:
dataset['Fertilizer Name'] = dataset['Fertilizer Name'].replace(fertilizer_dict)

In [22]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,0,0,37,0,0,0
1,29,52,45,1,1,12,0,36,1
2,34,65,62,2,2,7,9,30,2
3,32,62,34,3,3,22,0,20,3
4,28,54,46,4,4,35,0,0,0


In [23]:
dataset.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [24]:
##seperating dependent independent features
X = dataset.drop('Fertilizer Name', axis = 1)
y = dataset['Fertilizer Name']

In [25]:
from sklearn.model_selection import train_test_split


In [26]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [27]:
from sklearn.preprocessing import StandardScaler

In [28]:
scaler = StandardScaler()

In [29]:
scaled_X_train = scaler.fit_transform(X_train)

In [30]:
scaled_X_test = scaler.transform(X_test)

In [31]:
# Random Forest Model

In [32]:
model = RandomForestClassifier(max_depth=4, random_state=12)

In [33]:
model.fit(scaled_X_train, y_train)

RandomForestClassifier(max_depth=4, random_state=12)

In [34]:
yhat = model.predict(scaled_X_test)

In [35]:
accuracy_score(yhat, y_test)

0.9696969696969697

In [38]:
## Gridsearch CV

In [39]:
param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}

In [40]:
model = RandomForestClassifier()
CV_rfc = GridSearchCV(estimator=model,param_grid=param_grid, cv= 5)


In [41]:
CV_rfc.fit(scaled_X_train,y_train)



GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [200, 500]})

In [42]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 5,
 'max_features': 'auto',
 'n_estimators': 200}

In [43]:
model = RandomForestClassifier( random_state=12,criterion='gini', max_depth= 5, max_features='log2', n_estimators = 200)

In [44]:
model.fit(scaled_X_train,y_train)

RandomForestClassifier(max_depth=5, max_features='log2', n_estimators=200,
                       random_state=12)

In [45]:
yhat = model.predict(scaled_X_test)

In [46]:
accuracy_score(yhat,y_test)

1.0

In [48]:
pickle.dump(model, open("fertilizer_recommendation.pkl", "wb"))

In [49]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,0,0,37,0,0,0
1,29,52,45,1,1,12,0,36,1
2,34,65,62,2,2,7,9,30,2
3,32,62,34,3,3,22,0,20,3
4,28,54,46,4,4,35,0,0,0


In [51]:
##pipeline
soil_type_dict = {'Sandy': 0, 'Loamy': 1, 'Black': 2, 'Red': 3, 'Clayey': 4}
crop_type_dict = {'Maize': 0,'Sugarcane': 1,'Cotton': 2,'Tobacco': 3,'Paddy': 4,'Barley': 5,'Wheat': 6,'Millets': 7,'Oil seeds': 8,'Pulses': 9,'Ground Nuts': 10}
fertilizer_dict = {'Urea': 0,'DAP': 1,'14-35-14': 2,'28-28': 3,'17-17-17': 4,'20-20': 5,'10-26-26': 6}

In [54]:
dataset_copy.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [66]:
for i in fertilizer_dict:
    if fertilizer_dict[i] == 0:
        print(i)

Urea


In [58]:
dataset_copy.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [57]:
temp = 26
humi = 52
moist = 38
soil_type = "Sandy"
soil_type_econded= soil_type_dict[soil_type]
crop_type = "Maize"
crop_type_encoded = crop_type_dict[crop_type]
nitrogen = 37
potassium = 0
phos = 0

In [61]:
input1 = pd.DataFrame([[temp,humi,moist,soil_type_econded,crop_type_encoded,nitrogen,potassium,phos]],columns=['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous'])


In [62]:
input1

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
0,26,52,38,0,0,37,0,0


In [65]:
prediction = model.predict(input1)[0]



In [70]:
fertilizer_predicted = [i for i in fertilizer_dict if fertilizer_dict[i] == prediction]
print(fertilizer_predicted)

['Urea']
