In [44]:
# Importing Libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import pickle
import sklearn

In [45]:
dataset = pd.read_csv('Fertilizer Prediction.csv')

In [46]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [47]:
dataset_copy = pd.read_csv('Fertilizer Prediction.csv')

In [48]:
dataset.shape

(99, 9)

In [49]:
dataset['Soil Type'].unique()

array(['Sandy', 'Loamy', 'Black', 'Red', 'Clayey'], dtype=object)

In [50]:
dataset.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [51]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [52]:
# categorical column
categorical_column = [col for col in dataset.columns if dataset[col].dtype== 'O']

In [53]:
categorical_column

['Soil Type', 'Crop Type', 'Fertilizer Name']

In [54]:
dataset['Soil Type'].value_counts()

Loamy     21
Sandy     20
Clayey    20
Black     19
Red       19
Name: Soil Type, dtype: int64

In [55]:
for i in categorical_column:
    print(i)
    print(dataset[i].unique())
    

Soil Type
['Sandy' 'Loamy' 'Black' 'Red' 'Clayey']
Crop Type
['Maize' 'Sugarcane' 'Cotton' 'Tobacco' 'Paddy' 'Barley' 'Wheat' 'Millets'
 'Oil seeds' 'Pulses' 'Ground Nuts']
Fertilizer Name
['Urea' 'DAP' '14-35-14' '28-28' '17-17-17' '20-20' '10-26-26']


In [56]:
def encode1(column_name):
    dict_lable = dict()
    labels_list = [lable for lable in dataset[column_name].unique()]   
    for i, label in enumerate(labels_list):
        dict_lable[label] = i
    return dict_lable

In [57]:
soil_type_dict =encode1('Soil Type')

In [58]:
soil_type_dict

{'Sandy': 0, 'Loamy': 1, 'Black': 2, 'Red': 3, 'Clayey': 4}

In [59]:
dataset['Soil Type'] = dataset['Soil Type'].replace(soil_type_dict)

In [60]:
crop_type_dict = encode1('Crop Type')

In [61]:
crop_type_dict

{'Maize': 0,
 'Sugarcane': 1,
 'Cotton': 2,
 'Tobacco': 3,
 'Paddy': 4,
 'Barley': 5,
 'Wheat': 6,
 'Millets': 7,
 'Oil seeds': 8,
 'Pulses': 9,
 'Ground Nuts': 10}

In [62]:
dataset['Crop Type'] = dataset['Crop Type'].replace(crop_type_dict)

In [63]:
fertilizer_dict = encode1('Fertilizer Name')

In [64]:
fertilizer_dict

{'Urea': 0,
 'DAP': 1,
 '14-35-14': 2,
 '28-28': 3,
 '17-17-17': 4,
 '20-20': 5,
 '10-26-26': 6}

In [65]:
dataset['Fertilizer Name'] = dataset['Fertilizer Name'].replace(fertilizer_dict)

In [66]:
dataset.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [67]:
dataset.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,0,0,37,0,0,0
1,29,52,45,1,1,12,0,36,1
2,34,65,62,2,2,7,9,30,2
3,32,62,34,3,3,22,0,20,3
4,28,54,46,4,4,35,0,0,0


In [102]:
max(dataset['Temparature'])

38

In [68]:
##seperating dependent independent features
X = dataset.drop('Fertilizer Name', axis = 1)
y = dataset['Fertilizer Name']

In [69]:
## Normalization

In [70]:
from sklearn.model_selection import train_test_split


In [71]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [72]:
from sklearn.preprocessing import StandardScaler

In [73]:
scaler = StandardScaler()

In [74]:
scaled_X_train = scaler.fit_transform(X_train)

In [40]:
scaled_X_test = scaler.transform(X_test)

In [41]:
pickle.dump(scaler, open('standard_scaler.pkl', "wb"))

In [75]:
##Model training

In [112]:
random_classifier = RandomForestClassifier()
svc_model = SVC()

## Logistic Model

In [77]:
logistic_model = logistic_model.fit(scaled_X_train,y_train)

In [78]:
y_pred = logistic_model.predict(scaled_X_test)

In [85]:
test_score = accuracy_score(y_pred,y_test)

In [86]:
test_score

1.0

In [80]:
## to test underfitting
y_pred_x_train = logistic_model.predict(scaled_X_train)

In [84]:
train_score = accuracy_score(y_pred_x_train,y_train)

In [87]:
train_score

0.9848484848484849

In [88]:
test_score - train_score

0.015151515151515138

In [89]:
##lets check if we can decrease the test and train accuracy score's difference

# Random Forest

In [92]:
random_forest = RandomForestClassifier(max_depth=4, random_state=12)

In [94]:
random_forest = random_forest.fit(scaled_X_train,y_train)

In [95]:
y_pred = random_forest.predict(scaled_X_test)

In [96]:
accuracy_score(y_pred,y_test)

0.9696969696969697

In [97]:
train_pred = random_forest.predict(scaled_X_train)

In [98]:
accuracy_score(train_pred,y_train)

1.0

In [99]:
##we will use Logistic model for the further use

In [100]:
#creating object of the model

# Hyper parameter tunning Logistic model

In [110]:
parameters = [{'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']},
              {'penalty':['none', 'elasticnet', 'l1', 'l2']},
              {'C':[0.001, 0.01, 0.1, 1, 10, 2,3,4,5,6]}]



grid_search = GridSearchCV(estimator = logistic_model,  
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5,
                           verbose=0)


grid_search.fit(scaled_X_train,y_train)

10 fits failed out of a total of 95.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/home/rajan/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/rajan/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/rajan/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py", line 54, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.

-

In [111]:
grid_search.best_params_

{'C': 2}

In [117]:
logistic_model = LogisticRegression(C=  2)
logistic_model= logistic_model.fit(scaled_X_train,y_train)

In [118]:
y_pred = logistic_model.predict(scaled_X_test)

In [119]:
test_score = accuracy_score(y_pred,y_test)

In [120]:
test_score

1.0

In [121]:
## to test underfitting
y_pred_x_train = logistic_model.predict(scaled_X_train)

In [122]:
train_score = accuracy_score(y_pred_x_train,y_train)

In [123]:
train_score

1.0

In [124]:
### we are able to get the best accurate model

In [125]:
pickle.dump(logistic_model, open('fertilizer_recommend.pkl', 'wb'))

In [126]:
## Pipeline for the model prediction

In [127]:
temp = 26
humi = 52
moist = 38
soil_type = "Sandy"
crop_type = "Maize"
nitrogen = 37
potassium = 0
phos = 0

In [129]:
def fertilizer_prediction(temp, humi,moist,soil_type,crop_type,nitrogen,potassium, phos):
    soil_type_dict = {'Sandy': 0, 'Loamy': 1, 'Black': 2, 'Red': 3, 'Clayey': 4}
    fertilizer_dict = {'Urea': 0,'DAP': 1,'14-35-14': 2,'28-28': 3,'17-17-17': 4,'20-20': 5,'10-26-26': 6}
    crop_type_dict = {'Maize': 0,'Sugarcane': 1,'Cotton': 2,'Tobacco': 3,'Paddy': 4,'Barley': 5,'Wheat': 6,'Millets': 7,'Oil seeds': 8,'Pulses': 9,'Ground Nuts': 10}
    soil_type_econded= soil_type_dict[soil_type]
    crop_type_encoded = crop_type_dict[crop_type]
    input1 = pd.DataFrame([[temp,humi,moist,soil_type_econded,crop_type_encoded,nitrogen,potassium,phos]],columns=['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous'])
    scaler = pickle.load(open('standard_scaler.pkl', 'rb'))
    test_data = scaler.transform(input1)
    model = pickle.load(open('fertilizer_recommend.pkl', 'rb'))
    prediction = model.predict(input1)[0]
    fertilizer_predicted = [i for i in fertilizer_dict if fertilizer_dict[i] == prediction]
    return fertilizer_predicted
    
fertilizer_prediction(temp,humi,moist, soil_type,crop_type,nitrogen,potassium,phos)



['Urea']