In [69]:
import pandas as pd
import numpy as np

df = pd.read_csv("plants.csv", encoding = "latin1")
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen


In [70]:
df = df.drop_duplicates(subset=df.columns[1:], keep='first')
df = df.reset_index(drop=True)
df.to_csv("dupa.csv", index=False)

In [None]:
df['Watering'][df["Watering"]=="Regular watering"]="Regular Watering"

In [None]:
df['Watering'].unique()

In [73]:
moisture_levels = [
    "Keep soil consistently moist",
    "Keep soil evenly moist",
    "Keep soil moist",
    "Keep soil slightly moist",
    "Regular, moist soil",
    "Regular Watering",
    "Regular, well-drained soil",
    "Water weekly",
    "Water when soil feels dry",
    "Water when topsoil is dry",
    "Water when soil is dry",
    "Let soil dry between watering"
]
level = np.linspace(1,-1,len(moisture_levels))
mapping = dict(zip(moisture_levels,level))
mapping
# mapping
df['Watering Frequency']=df["Watering"].map(mapping)
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0


In [74]:
df['Fertilization Type'].unique()

array(['Balanced', 'Organic', 'No', 'Low-nitrogen', 'Acidic'],
      dtype=object)

In [75]:
health_level =[
    "Balanced",
    "Organic",
    "No",
    "Low-nitrogen",
    "Acidic"
]

level = np.linspace(1,-1, len(health_level))
mapping = dict(zip(health_level, level))

df["Health Indicator"] = df['Fertilization Type'].map(mapping)

In [76]:
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5


In [77]:
df['Growth'].unique()

array(['slow', 'fast', 'moderate'], dtype=object)

In [78]:
efficiency_level=[
    "fast",
    "moderate",
    "slow"
]

level = np.linspace(1,-1,len(efficiency_level))

mapping = dict(zip(efficiency_level, level))
df["Efficiency"] = df['Growth'].map(mapping)

In [79]:
df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator,Efficiency
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0,-1.0
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5,1.0
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0,-1.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0,0.0
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5,-1.0


In [80]:
df['Soil'].unique()

array(['sandy', 'well-drained', 'loamy', 'acidic', 'moist'], dtype=object)

In [81]:
soil = [
    "loamy",
    "moist",
    "well-drained",
    "sandy",
    "acidic"
]
level = np.linspace(1,-1,len(soil))

mapping = dict(zip(soil,level))

df['Soil goodness'] = df['Soil'].map(mapping)

df.head()

Unnamed: 0,Plant Name,Growth,Soil,Sunlight,Watering,Fertilization Type,Watering Frequency,Health Indicator,Efficiency,Soil goodness
0,Aloe Vera,slow,sandy,indirect sunlight,Water weekly,Balanced,-0.272727,1.0,-1.0,-0.5
1,Basil,fast,well-drained,full sunlight,Keep soil evenly moist,Organic,0.818182,0.5,1.0,0.0
2,Snake Plant,slow,well-drained,indirect sunlight,Water when soil is dry,No,-0.818182,0.0,-1.0,0.0
3,Lavender,moderate,sandy,full sunlight,Let soil dry between watering,No,-1.0,0.0,0.0,-0.5
4,Cactus,slow,sandy,full sunlight,Let soil dry between watering,Low-nitrogen,-1.0,-0.5,-1.0,-0.5


In [82]:
df['Sunlight'].unique()

array(['indirect sunlight', 'full sunlight', 'partial sunlight'],
      dtype=object)

In [83]:
sun =['full sunlight',
    'partial sunlight',
     'indirect sunlight'
]

level =np.linspace(1,-1,len(sun))
mapping = dict(zip(sun,level))

df['sun liking'] = df['Sunlight'].map(mapping)

In [84]:
counts = df.groupby(['Watering Frequency', 'Fertilization Type', 'Health Indicator', 'Efficiency', 'Soil goodness', 'sun liking']).size()
counts

Watering Frequency  Fertilization Type  Health Indicator  Efficiency  Soil goodness  sun liking
-1.0                Balanced             1.0               0.0        -1.0           0.0           1
                                                                      -0.5           1.0           1
                                                                       0.0           1.0           1
                                                                       1.0           0.0           1
                    Low-nitrogen        -0.5              -1.0        -0.5           1.0           1
                                                                                                  ..
 1.0                Organic              0.5               0.0        -1.0           1.0           1
                                                                       0.0           0.0           1
                                                                                     1.0        