In [1]:
import numpy as np 
import pandas as pd 
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [2]:
df = pd.read_csv('plant_growth_data.csv')
df

Unnamed: 0,Soil_Type,Sunlight_Hours,Water_Frequency,Fertilizer_Type,Temperature,Humidity,Growth_Milestone
0,loam,5.192294,bi-weekly,chemical,31.719602,61.591861,0
1,sandy,4.033133,weekly,organic,28.919484,52.422276,1
2,loam,8.892769,bi-weekly,none,23.179059,44.660539,0
3,loam,8.241144,bi-weekly,none,18.465886,46.433227,0
4,sandy,8.374043,bi-weekly,organic,18.128741,63.625923,0
...,...,...,...,...,...,...,...
188,sandy,5.652000,daily,none,28.000000,70.200000,0
189,clay,7.528000,weekly,chemical,30.500000,60.100000,1
190,loam,4.934000,bi-weekly,none,24.500000,61.700000,0
191,sandy,8.273000,daily,organic,27.900000,69.500000,1


In [3]:
df.isna().sum()

Soil_Type           0
Sunlight_Hours      0
Water_Frequency     0
Fertilizer_Type     0
Temperature         0
Humidity            0
Growth_Milestone    0
dtype: int64

In [4]:
# Map categorical variables to numerical values
soil_type_mapping = {'loam': 0, 'sandy': 1, 'clay': 2}
water_frequency_mapping = {'daily': 0, 'bi-weekly': 1, 'weekly': 2}
fertilizer_type_mapping = {'none': 0, 'chemical': 1, 'organic': 2}

df['Soil_Type'] = df['Soil_Type'].map(soil_type_mapping)
df['Water_Frequency'] = df['Water_Frequency'].map(water_frequency_mapping)
df['Fertilizer_Type'] = df['Fertilizer_Type'].map(fertilizer_type_mapping)

In [5]:
df.isna().sum()

Soil_Type           0
Sunlight_Hours      0
Water_Frequency     0
Fertilizer_Type     0
Temperature         0
Humidity            0
Growth_Milestone    0
dtype: int64

In [6]:
df

Unnamed: 0,Soil_Type,Sunlight_Hours,Water_Frequency,Fertilizer_Type,Temperature,Humidity,Growth_Milestone
0,0,5.192294,1,1,31.719602,61.591861,0
1,1,4.033133,2,2,28.919484,52.422276,1
2,0,8.892769,1,0,23.179059,44.660539,0
3,0,8.241144,1,0,18.465886,46.433227,0
4,1,8.374043,1,2,18.128741,63.625923,0
...,...,...,...,...,...,...,...
188,1,5.652000,0,0,28.000000,70.200000,0
189,2,7.528000,2,1,30.500000,60.100000,1
190,0,4.934000,1,0,24.500000,61.700000,0
191,1,8.273000,0,2,27.900000,69.500000,1


In [None]:
# Define features (X) and target (y)
X = df.drop(columns='Growth_Milestone')
y = df['Growth_Milestone']

# Scaling the features
scaler = StandardScaler()
X[['Sunlight_Hours', 'Temperature', 'Humidity']] = scaler.fit_transform(X[['Sunlight_Hours', 'Temperature', 'Humidity']])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
models = [
    (
        'Logistic Regression',
        {'max_iter':1000, 'random_state':42},
        LogisticRegression(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        'Decision Tree Classifier',
        {'max_depth':5, 'min_samples_split':10},
        DecisionTreeClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        'Random Forest Classifier',
        {'n_estimators':100, 'max_depth':10, 'min_samples_split':5},
        RandomForestClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        'SVC',
        {'kernel':'rbf', 'C':1, 'gamma':'scale', 'random_state':42},
        SVC(),
        (X_train, y_train),
        (X_test, y_test)
    )
]

In [9]:
reports = []

for model_name, params, model, (X_train, y_train), (X_test, y_test) in models:
    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [11]:
mlflow.set_experiment('Plant Growth Prediction2')
mlflow.set_registry_uri('http://127.0.0.1:5000/')

for i, element in enumerate(models):
    model_name = element[0]
    params = element[1]
    model = element[2]
    report = reports[i]

    with mlflow.start_run(run_name=model_name):
        params['model_name'] = model_name
        mlflow.log_params(params)
        
        # Log metrics
        mlflow.log_metrics({
            'accuracy': report['accuracy'],
            'recall_class_0': report['0']['recall'],
            'recall_class_1': report['1']['recall'],
            'f1-score_macro': report['macro avg']['f1-score']
        })