In [42]:
import pandas as pd
import numpy as np

df = pd.read_csv("datasets/plants.csv", encoding = "latin1")
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen


In [7]:
df = df.drop_duplicates(subset=df.columns[0], keep='first')
df.shape

(388, 6)

Dropping duplicates, so onlly unique parameters remain. Saving edited dataset into new file for further exploatation.

In [8]:
df = df.drop_duplicates(subset=df.columns[1:], keep='first')
df = df.reset_index(drop=True)


In [55]:
#fixing spelling error
df.loc[df["Watering"] == "Regular watering", "Watering"] = "Regular Watering"


In [56]:
df['Watering'].unique()

array(['Water weekly', 'Keep soil evenly moist', 'Water when soil is dry',
       'Let soil dry between watering', 'Keep soil consistently moist',
       'Water when soil feels dry', 'Keep soil slightly moist',
       'Water when topsoil is dry', 'Keep soil moist', 'Regular Watering',
       'Regular, well-drained soil', 'Regular, moist soil', nan],
      dtype=object)

In [11]:
moisture_levels = [
    "Keep soil consistently moist",
    "Keep soil evenly moist",
    "Keep soil moist",
    "Keep soil slightly moist",
    "Regular, moist soil",
    "Regular Watering",
    "Regular, well-drained soil",
    "Water weekly",
    "Water when soil feels dry",
    "Water when topsoil is dry",
    "Water when soil is dry",
    "Let soil dry between watering"
]
level = np.linspace(1,-1,len(moisture_levels))
mapping = dict(zip(moisture_levels,level))
mapping

df['Watering Frequency']=df["Watering"].map(mapping)
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0


In [12]:
df['Fertilization Type'].unique()

array(['Balanced', 'Organic', 'No', 'Low-nitrogen', 'Acidic'],
      dtype=object)

In [13]:
health_level =[
    "Balanced",
    "Organic",
    "No",
    "Low-nitrogen",
    "Acidic"
]

level = np.linspace(1,-1, len(health_level))
mapping = dict(zip(health_level, level))

df["Health Indicator"] = df['Fertilization Type'].map(mapping)

In [14]:
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5


In [15]:
df['Growth'].unique()

array(['slow', 'fast', 'moderate'], dtype=object)

In [16]:
efficiency_level=[
    "fast",
    "moderate",
    "slow"
]

level = np.linspace(1,-1,len(efficiency_level))

mapping = dict(zip(efficiency_level, level))
df["Efficiency"] = df['Growth'].map(mapping)

In [17]:
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator,Efficiency
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0,-1.0
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5,1.0
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0,-1.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0,0.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5,-1.0


In [18]:
df['Soil'].unique()

array(['sandy', 'well-drained', 'loamy', 'acidic', 'moist'], dtype=object)

In [19]:
soil = [
    "loamy",
    "moist",
    "well-drained",
    "sandy",
    "acidic"
]
level = np.linspace(1,-1,len(soil))

mapping = dict(zip(soil,level))

df['Soil goodness'] = df['Soil'].map(mapping)

df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator,Efficiency,Soil goodness
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0,-1.0,-0.5
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5,1.0,0.0
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0,-1.0,0.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0,0.0,-0.5
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5,-1.0,-0.5


In [20]:
df['Sunlight'].unique()

array(['indirect sunlight', 'full sunlight', 'partial sunlight'],
      dtype=object)

In [21]:
sun =['full sunlight',
    'partial sunlight',
     'indirect sunlight'
]

level =np.linspace(1,-1,len(sun))
mapping = dict(zip(sun,level))

df['sun liking'] = df['Sunlight'].map(mapping)

In [22]:
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator,Efficiency,Soil goodness,sun liking
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0,-1.0,-0.5,-1.0
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5,1.0,0.0,1.0
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0,-1.0,0.0,-1.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0,0.0,-0.5,1.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5,-1.0,-0.5,1.0


In [23]:
X = df.iloc[:,6:].values
y = df.iloc[:,0].values
y

array(['Aloe Vera', 'Basil', 'Snake Plant', 'Lavender', 'Cactus',
       'Rosemary', 'Mint', 'Thyme', 'Peace Lily', 'Spider Plant',
       'Parsley', 'Fern (Boston)', 'Bamboo Palm', 'Money Plant',
       'Jade Plant', 'Orchids', 'Cilantro', 'Dill', 'Oregano', 'Pothos',
       'Bougainvillea', 'Fiddle Leaf Fig', 'Sage', 'Cosmos', 'Geranium',
       'Marigold', 'Zinnia', 'Hibiscus', 'Begonia', 'Chrysanthemum',
       'Daffodil', 'Tulip', 'Jasmine', 'Ivy', 'Nasturtium', 'Carnation',
       'Pansy', 'Sweet Pea', 'Verbena', 'Gerbera Daisy', 'Hyacinth',
       'Foxglove', 'Bluebell', 'Camellia', 'Magnolia', 'Gardenia',
       'Azalea', 'Eucalyptus', 'Lotus', 'Orchid Cactus', 'Lotus Bamboo',
       'Coneflower', 'Alyssum', 'Lobelia', 'Aster', 'Coral Bells',
       'Bleeding Heart', 'Sweet Alyssum', 'Astilbe', 'Coreopsis',
       'Globe Thistle', 'Catnip', 'Tarragon', 'Bay Laurel', 'Lemon Balm',
       'Fennel', 'Stevia', 'Marjoram', 'ZZ Plant', 'Philodendron',
       'Calathea', 'Croton', 'Fi

In [24]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(X,y)


0,1,2
,n_neighbors,1
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [25]:
import pickle

In [26]:
filename = 'content/knn_classifier.svn'
pickle.dump(classifier, open(filename, 'wb'))

df.to_csv('datasets/plants_unique.csv', index=False)

In [27]:
classifier.predict([[1,0.67,1,0,1]])

array(['Basil'], dtype=object)

In [28]:
print(df[df['Plant Name']=='Basil'].to_string(index=False))

Plant Name Growth         Soil      Sunlight               Watering Fertilization Type  Watering Frequency  Health Indicator  Efficiency  Soil goodness  sun liking
     Basil   fast well-drained full sunlight Keep soil evenly moist            Organic            0.818182               0.5         1.0            0.0         1.0
