In [1]:
import pandas as pd
import numpy as np
import random
import os

import matplotlib.pyplot as plt
import seaborn as sns

In [74]:
df = pd.read_csv("Fertilizer_Recommendation.csv")
df.head()

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name
0,26,52,38,Sandy,Maize,37,0,0,Urea
1,29,52,45,Loamy,Sugarcane,12,0,36,DAP
2,34,65,62,Black,Cotton,7,9,30,14-35-14
3,32,62,34,Red,Tobacco,22,0,20,28-28
4,28,54,46,Clayey,Paddy,35,0,0,Urea


In [64]:
df.shape

(99, 9)

In [65]:
df.columns

Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')

In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Temparature      99 non-null     int64 
 1   Humidity         99 non-null     int64 
 2   Moisture         99 non-null     int64 
 3   Soil Type        99 non-null     object
 4   Crop Type        99 non-null     object
 5   Nitrogen         99 non-null     int64 
 6   Potassium        99 non-null     int64 
 7   Phosphorous      99 non-null     int64 
 8   Fertilizer Name  99 non-null     object
dtypes: int64(6), object(3)
memory usage: 7.1+ KB


In [67]:
df.isnull().sum()

Temparature        0
Humidity           0
Moisture           0
Soil Type          0
Crop Type          0
Nitrogen           0
Potassium          0
Phosphorous        0
Fertilizer Name    0
dtype: int64

In [68]:
print("Number of various crops: ", len(df['Fertilizer Name'].unique()))
print("List of crops: ", df['Fertilizer Name'].unique())

Number of various crops:  7
List of crops:  ['Urea' 'DAP' '14-35-14' '28-28' '17-17-17' '20-20' '10-26-26']


In [69]:
df['Fertilizer Name'].value_counts()

Fertilizer Name
Urea        22
DAP         18
28-28       17
14-35-14    14
20-20       14
17-17-17     7
10-26-26     7
Name: count, dtype: int64

In [70]:
# Print column names
print(df.columns)


Index(['Temparature', 'Humidity ', 'Moisture', 'Soil Type', 'Crop Type',
       'Nitrogen', 'Potassium', 'Phosphorous', 'Fertilizer Name'],
      dtype='object')


In [75]:
from sklearn.preprocessing import LabelEncoder

# Assuming df is your DataFrame containing the data
# Create a label encoder object
label_encoder_soil = LabelEncoder()
label_encoder_crop = LabelEncoder()

# Encode Soil Type and Crop Type columns
df['Soil Type'] = label_encoder_soil.fit_transform(df['Soil Type'])
df['Crop Type'] = label_encoder_crop.fit_transform(df['Crop Type'])

# Get the unique labels and their corresponding numerical representations for Soil Type
soil_labels = label_encoder_soil.classes_
soil_mapping = {label: num for label, num in zip(soil_labels, label_encoder_soil.transform(soil_labels))}

# Get the unique labels and their corresponding numerical representations for Crop Type
crop_labels = label_encoder_crop.classes_
crop_mapping = {label: num for label, num in zip(crop_labels, label_encoder_crop.transform(crop_labels))}

# Print the mapping of labels to numerical representations for Soil Type
print("Soil Type Label Mapping:")
for label, num in soil_mapping.items():
    print(f"{label}: {num}")

# Print the mapping of labels to numerical representations for Crop Type
print("\nCrop Type Label Mapping:")
for label, num in crop_mapping.items():
    print(f"{label}: {num}")


Soil Type Label Mapping:
Black: 0
Clayey: 1
Loamy: 2
Red: 3
Sandy: 4

Crop Type Label Mapping:
Barley: 0
Cotton: 1
Ground Nuts: 2
Maize: 3
Millets: 4
Oil seeds: 5
Paddy: 6
Pulses: 7
Sugarcane: 8
Tobacco: 9
Wheat: 10


In [97]:
list(df.head(1).values[0])

[26, 52, 38, 4, 3, 37, 0, 0, 'Urea']

In [77]:
crop_summary = pd.pivot_table(df,index=['Fertilizer Name'],aggfunc='mean')
crop_summary.head()

Unnamed: 0_level_0,Crop Type,Humidity,Moisture,Nitrogen,Phosphorous,Potassium,Soil Type,Temparature
Fertilizer Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
10-26-26,6.857143,58.142857,39.285714,7.571429,17.714286,17.714286,2.142857,29.714286
14-35-14,4.571429,61.142857,45.214286,8.214286,29.571429,8.571429,1.857143,31.357143
17-17-17,4.285714,57.571429,47.142857,12.142857,13.142857,13.0,2.571429,29.0
20-20,6.071429,57.571429,45.285714,11.214286,11.571429,0.0,1.714286,29.142857
28-28,5.294118,58.117647,41.941176,22.647059,21.058824,0.0,2.352941,29.529412


In [78]:
x = df.drop('Fertilizer Name', axis=1)
y = df['Fertilizer Name']

In [79]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3,
                                                    shuffle = True, random_state = 0)

In [82]:
# build the lightgbm model
import lightgbm as lgb

model = lgb.LGBMClassifier()
model.fit(x_train, y_train)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000372 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 93
[LightGBM] [Info] Number of data points in the train set: 69, number of used features: 7
[LightGBM] [Info] Start training from score -2.847812
[LightGBM] [Info] Start training from score -2.036882
[LightGBM] [Info] Start training from score -2.442347
[LightGBM] [Info] Start training from score -1.836211
[LightGBM] [Info] Start training from score -2.036882
[LightGBM] [Info] Start training from score -1.526056
[LightGBM] [Info] Start training from score -1.526056


In [83]:
model.score(x_train,y_train)

1.0

In [84]:
y_pred=model.predict(x_test)

In [85]:
x_test

Unnamed: 0,Temparature,Humidity,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous
26,30,60,63,3,1,9,9,29
86,29,58,30,3,9,13,17,16
2,34,65,62,0,1,7,9,30
55,27,53,43,0,4,23,0,24
75,29,58,42,1,6,9,10,22
92,36,68,41,3,2,41,0,0
16,33,64,34,1,7,38,0,0
73,27,53,34,0,5,42,0,0
54,31,62,63,3,1,11,12,15
94,25,50,32,1,7,24,0,19


In [86]:
y_test

26    14-35-14
86    10-26-26
2     14-35-14
55       28-28
75    14-35-14
92        Urea
16        Urea
73        Urea
54    17-17-17
94       28-28
53         DAP
91    10-26-26
78    10-26-26
13       28-28
7         Urea
30       28-28
22         DAP
24       20-20
33    14-35-14
8        28-28
43        Urea
62       28-28
3        28-28
71        Urea
45    14-35-14
48        Urea
6        20-20
98       20-20
82         DAP
76       28-28
Name: Fertilizer Name, dtype: object

In [87]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9333333333333333

In [88]:
list(x_test.head(1).values[0])

[30, 60, 63, 3, 1, 9, 9, 29]

In [98]:
test = model.predict([[26, 52, 38, 4, 3, 37, 0, 0]])
print(test[0])

Urea


In [99]:
import pickle

In [100]:
pickle.dump(model,open('model_saved_fert.pkl','wb'))

In [101]:
model_loaded = pickle.load(open('model_saved_fert.pkl','rb'))

In [102]:
test = model_loaded.predict([[26, 52, 38, 4, 3, 37, 0, 0]])
print(test[0])

Urea
