In [1]:
#manipulation
import numpy as pd
import pandas as pd

#visualisation
import seaborn as sns
import matplotlib.pyplot as plt

#interactivity
from ipywidgets import interact


In [2]:
data = pd.read_csv("crop_recommendation.csv")

In [4]:
#Shape of the dataset.....rows and cols
data.shape

(2200, 8)

In [5]:
data.head(7)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice
5,69,37,42,23.058049,83.370118,7.073454,251.055,rice
6,69,55,38,22.708838,82.639414,5.700806,271.32486,rice


In [6]:
#checking for missing value

# fill the dataset with:
# median : if outliers are present
# mode : if its categorical data
# mean : if its numerical data
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [7]:
# checking the name and freq of occurence of unique crops

data['label'].value_counts()

rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: label, dtype: int64

In [11]:
#summary for all crops

print("Average ratio of nitrogen in the soil: {0:.2f}".format(data['N'].mean()))
print("Average ratio of phosphorus in the soil: {0:.2f}".format(data['P'].mean()))
print("Average ratio of potassium in the soil: {0:.2f}".format(data['K'].mean()))
print("Average temp: {0:.2f}".format(data['temperature'].mean()))
print("Average relative humidity: {0:.2f}".format(data['humidity'].mean()))
print("Average rainfall: {0:.2f}".format(data['rainfall'].mean()))
print("Average ph of the soil: {0:.2f}".format(data['ph'].mean()))

Average ratio of nitrogen in the soil: 50.55
Average ratio of phosphorus in the soil: 53.36
Average ratio of potassium in the soil: 48.15
Average temp: 25.62
Average relative humidity: 71.48
Average rainfall: 103.46
Average ph of the soil: 6.47


In [17]:
# summary stats for each crop

@interact

def sum(crops = list(data['label'].value_counts().index)):
    x = data[data['label'] == crops]
    print("--------------------------")
    print("STATS FOR NITROGEN")
    print("min",x['N'].min())
    print("max",x['N'].max())
    print("mean",x['N'].mean())
    
    print("--------------------------")
    print("STATS FOR PHOSPHORUS")
    print("min",x['P'].min())
    print("max",x['P'].max())
    print("mean",x['P'].mean())
    
    print("--------------------------")
    print("STATS FOR POTASSIUM")
    print("min",x['K'].min())
    print("max",x['K'].max())
    print("mean",x['K'].mean())
    
    print("--------------------------")
    print("STATS FOR TEMPERATURE")
    print("min",x['temperature'].min())
    print("max",x['temperature'].max())
    print("mean",x['temperature'].mean())
    
    print("--------------------------")
    print("STATS FOR HUMIDITY")
    print("min",x['humidity'].min())
    print("max",x['humidity'].max())
    print("mean",x['humidity'].mean())
    
    print("--------------------------")
    print("STATS FOR PH")
    print("min",x['ph'].min())
    print("max",x['ph'].max())
    print("mean",x['ph'].mean())
    
    print("--------------------------")
    print("STATS FOR RAINFALL")
    print("min",x['rainfall'].min())
    print("max",x['rainfall'].max())
    print("mean",x['rainfall'].mean())
    
    

interactive(children=(Dropdown(description='crops', options=('rice', 'maize', 'jute', 'cotton', 'coconut', 'pa…

In [26]:
# comparing crops requirement based on selected parameters

@interact

def comp(conditions = ["N","P","K","temperature","ph","rainfall","humidity"]):
    print("Average value for",conditions,"is {0:.2f}".format(data[conditions].mean()))
    print("-----------------")
    print("Rice : {0:.2f}".format(data[(data['label'] == 'rice')][conditions].mean()))
    print("Chick Peas : {0:.2f}".format(data[(data['label'] == 'chickpea')][conditions].mean()))
    print("Maize : {0:.2f}".format(data[(data['label'] == 'maize')][conditions].mean()))
    print("Mango : {0:.2f}".format(data[(data['label'] == 'mango')][conditions].mean()))
    print("Lentil : {0:.2f}".format(data[(data['label'] == 'lentil')][conditions].mean()))
    print("Kidney Beans : {0:.2f}".format(data[(data['label'] == 'kidneybeans')][conditions].mean()))
    print("Pomogrenate : {0:.2f}".format(data[(data['label'] == 'pomegranate')][conditions].mean()))
    print("Mung Beans : {0:.2f}".format(data[(data['label'] == 'mungbean')][conditions].mean()))
    print("Orange : {0:.2f}".format(data[(data['label'] == 'orange')][conditions].mean()))
    print("Moth Beans : {0:.2f}".format(data[(data['label'] == 'mothbeans')][conditions].mean()))
    print("Cotton : {0:.2f}".format(data[(data['label'] == 'cotton')][conditions].mean()))
    print("Coffee : {0:.2f}".format(data[(data['label'] == 'coffee')][conditions].mean()))
    print("Pigeon Peas : {0:.2f}".format(data[(data['label'] == 'pigeonpeas')][conditions].mean()))
    print("Black Grams : {0:.2f}".format(data[(data['label'] == 'blackgram')][conditions].mean()))
    print("Banana : {0:.2f}".format(data[(data['label'] == 'banana')][conditions].mean()))
    print("Coconut : {0:.2f}".format(data[(data['label'] == 'coconut')][conditions].mean()))
    print("Jute : {0:.2f}".format(data[(data['label'] == 'jute')][conditions].mean()))
    print("Muskmelon : {0:.2f}".format(data[(data['label'] == 'muskmelon')][conditions].mean()))
    print("Apple : {0:.2f}".format(data[(data['label'] == 'apple')][conditions].mean()))
    print("Papaya : {0:.2f}".format(data[(data['label'] == 'papaya')][conditions].mean()))
    print("Grapes : {0:.2f}".format(data[(data['label'] == 'grapes')][conditions].mean()))
    print("Watermelon : {0:.2f}".format(data[(data['label'] == 'watermelon')][conditions].mean()))

interactive(children=(Dropdown(description='conditions', options=('N', 'P', 'K', 'temperature', 'ph', 'rainfal…