In [152]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [73]:
dataset = pd.read_csv('Fertilizer Prediction.csv')

In [74]:
dataset.shape

(99, 9)

In [75]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [76]:
dataset.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [77]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [78]:
# categorical column
categorical_column = [col for col in dataset.columns if dataset[col].dtype== 'O']

In [79]:
categorical_column

['Soil Type', 'Crop Type', 'Fertilizer Name']

In [80]:
dataset['Soil Type'].value_counts()

Loamy     21
Sandy     20
Clayey    20
Black     19
Red       19
Name: Soil Type, dtype: int64

In [81]:
for i in categorical_column:
    print(i)
    print(dataset[i].unique())
    

Soil Type
['Sandy' 'Loamy' 'Black' 'Red' 'Clayey']
Crop Type
['Maize' 'Sugarcane' 'Cotton' 'Tobacco' 'Paddy' 'Barley' 'Wheat' 'Millets'
 'Oil seeds' 'Pulses' 'Ground Nuts']
Fertilizer Name
['Urea' 'DAP' '14-35-14' '28-28' '17-17-17' '20-20' '10-26-26']


In [85]:
def encode1(column_name):
    dict_lable = dict()
    labels_list = [lable for lable in dataset[column_name].unique()]   
    for i, label in enumerate(labels_list):
        dict_lable[label] = i
    return dict_lable

In [88]:
soil_type_dict =encode1('Soil Type')

In [89]:
dataset['Soil Type'] = dataset['Soil Type'].replace(soil_type_dict)

In [90]:
crop_type_dict = encode1('Crop Type')

In [92]:
dataset['Crop Type'] = dataset['Crop Type'].replace(crop_type_dict)

In [93]:
fertilizer_dict = encode1('Fertilizer Name')

In [95]:
dataset['Fertilizer Name'] = dataset['Fertilizer Name'].replace(fertilizer_dict)

In [96]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,0,0,37,0,0,0
1,29,52,45,1,1,12,0,36,1
2,34,65,62,2,2,7,9,30,2
3,32,62,34,3,3,22,0,20,3
4,28,54,46,4,4,35,0,0,0


In [97]:
dataset.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [101]:
##seperating dependent independent features
X = dataset.drop('Fertilizer Name', axis = 1)
y = dataset['Fertilizer Name']

In [102]:
from sklearn.model_selection import train_test_split


In [103]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [106]:
from sklearn.preprocessing import StandardScaler

In [107]:
scaler = StandardScaler()

In [108]:
scaled_X_train = scaler.fit_transform(X_train)

In [109]:
scaled_X_test = scaler.transform(X_test)

In [None]:
# Random Forest Model

In [146]:
model = RandomForestClassifier(max_depth=4, random_state=12)

In [147]:
model.fit(scaled_X_train, y_train)

RandomForestClassifier(max_depth=4, random_state=12)

In [148]:
yhat = model.predict(scaled_X_test)

In [149]:
accuracy_score(yhat, y_test)

0.9696969696969697

In [117]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [129]:
model2 = RandomForestClassifier()
model2.fit(scaled_X_train,y_train)
yhat = model2.predict(scaled_X_test)
print(accuracy_score(yhat,y_test))

1.0


In [153]:
## Gridsearch CV

In [154]:
param_grid = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}

In [165]:
model = RandomForestClassifier()
CV_rfc = GridSearchCV(estimator=model,param_grid=param_grid, cv= 5)


In [166]:
CV_rfc.fit(scaled_X_train,y_train)



GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [4, 5, 6, 7, 8],
                         'max_features': ['auto', 'sqrt', 'log2'],
                         'n_estimators': [200, 500]})

In [167]:
CV_rfc.best_params_

{'criterion': 'gini',
 'max_depth': 4,
 'max_features': 'log2',
 'n_estimators': 200}

In [172]:
model = RandomForestClassifier( random_state=12,criterion='gini', max_depth= 4, max_features='log2', n_estimators = 200)

In [174]:
model.fit(scaled_X_train,y_train)

RandomForestClassifier(max_depth=4, max_features='log2', n_estimators=200,
                       random_state=12)

In [175]:
yhat = model.predict(scaled_X_test)

In [176]:
accuracy_score(yhat,y_test)

1.0

In [None]:
i