In [1]:
# Import Libraries
import mlflow
import mlflow.sklearn

import numpy as np
from itertools import product

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

In [4]:
# Define Data Loading Function
def load_data():
    data_file = "../data/cleaned_processed_data.csv"  
    merged_data_clean = pd.read_csv(data_file)

    features = ["CERQ_Sum", "ERQ_Sum", "MSPSS_Sum", "FSoZu_Sum", 
                "BISBAS_Total", "NEO_Sum", "STAI_Sum", "STAXI_Sum", 
                "CVLT_Sum", "TAP_Sum", "BloodPressure_Mean", "Age_Numeric"]
    
    target = "Relationship_Status"
    X = merged_data_clean[features]
    y = merged_data_clean[target]

    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Data loaded successfully with {X_train.shape[0]} training samples and {X_test.shape[0]} test samples.")
    return X_train, X_test, y_train, y_test

In [5]:
# Set MLflow Experiment
mlflow.set_experiment("LEMON_Prediction_Relationship")

# Start the MLflow UI (Run this in the terminal if needed)
#!mlflow ui

<Experiment: artifact_location='file:///Users/samuel/Desktop/EKU%20Tu%CC%88bingen/data_literacy/project/repo/LEMON-Love-Predictor/code/mlruns/650246619203695642', creation_time=1734981523380, experiment_id='650246619203695642', last_update_time=1734981523380, lifecycle_stage='active', name='LEMON_Prediction_Relationship', tags={}>

In [14]:
# Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import mlflow
import mlflow.sklearn

# Load Data Function
def load_data():
    data_file = "../data/cleaned_processed_data.csv"  # Adjust path to your dataset
    print(f"Loading data from {data_file}...")
    merged_data_clean = pd.read_csv(data_file)
    features = [
        "CERQ_Sum", "ERQ_Sum", "MSPSS_Sum", "FSoZu_Sum", 
        "BISBAS_Total", "NEO_Sum", "STAI_Sum", "STAXI_Sum", 
        "CVLT_Sum", "TAP_Sum", "BloodPressure_Mean", "Age_Numeric"
    ]
    target = "Relationship_Status"
    X = merged_data_clean[features]
    y = merged_data_clean[target]
    
    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Data loaded successfully with {len(X_train)} training samples and {len(X_test)} test samples.")
    return X_train, X_test, y_train, y_test

# Define Model Training Function
def train_model(model_type, max_iter, C, n_estimators, kernel):
    # Load Data
    X_train, X_test, y_train, y_test = load_data()

    # Initialize Model
    if model_type == "Logistic Regression":
        model = LogisticRegression(max_iter=max_iter, C=C, solver='lbfgs')
    elif model_type == "Random Forest":
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=10, random_state=42)
    elif model_type == "SVM":
        model = SVC(C=C, kernel=kernel, max_iter=max_iter, probability=True)
    
    # Train the Model
    model.fit(X_train, y_train)

    # Evaluate the Model
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    print(f"Model: {model_type}, Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))
# Create a sample input example from the training data
    input_example = X_train.iloc[:1]

# Log Model and Metrics in MLflow
    with mlflow.start_run(run_name=f"{model_type}_{kernel}_run"):
        mlflow.log_param("model_type", model_type)
        mlflow.log_param("max_iter", max_iter)
        mlflow.log_param("C", C)
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("kernel", kernel)
        mlflow.log_metric("accuracy", acc)
        
        # Log the model with an input example
        mlflow.sklearn.log_model(model, artifact_path="model", input_example=input_example)
    # # Log Model and Metrics in MLflow
    # with mlflow.start_run(run_name=f"{model_type}_{kernel}_run"):
    #     mlflow.log_param("model_type", model_type)
    #     mlflow.log_param("max_iter", max_iter)
    #     mlflow.log_param("C", C)
    #     mlflow.log_param("n_estimators", n_estimators)
    #     mlflow.log_param("kernel", kernel)
    #     mlflow.log_metric("accuracy", acc)
        
    #     mlflow.sklearn.log_model(model, artifact_path="model")

# Train the Model Once
train_model(
    model_type="Random Forest",  # Change to "Random Forest" or "SVM" as needed
    max_iter=1000,
    C=1.0,
    n_estimators=100,  # Only used for Random Forest
    kernel="linear"    # Only used for SVM
)

Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





In [15]:
# Define Hyperparameter Sweep Config
sweep_config = {
    "model_type": [ "Random Forest", "SVM"],
    "max_iter": list(range(100, 1001, 100)),
    "C": np.linspace(0.1, 10, 10).tolist(),
    "n_estimators": list(range(50, 301, 50)),
    "kernel": ["linear", "rbf"]
}

# Create All Combinations
sweep_params = list(product(
    sweep_config["model_type"], 
    sweep_config["max_iter"], 
    sweep_config["C"], 
    sweep_config["n_estimators"], 
    sweep_config["kernel"]
))

In [16]:
# Execute Model Training for All Combinations
for params in sweep_params:
    model_type, max_iter, C, n_estimators, kernel = params

    # Skip Irrelevant Combinations
    if model_type == "Logistic Regression" and kernel != "linear":
        continue
    
    train_model(model_type, max_iter, C, n_estimators, kernel)

Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.30      0.18      0.22        17
           1       0.61      0.76      0.68        29

    accuracy                           0.54        46
   macro avg       0.46      0.47      0.45        46
weighted avg       0.50      0.54      0.51        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5217
              precision    recall  f1-score   support

           0       0.27      0.18      0.21        17
           1       0.60      0.72      0.66        29

    accuracy                           0.52        46
   macro avg       0.44      0.45      0.44        46
weighted avg       0.48      0.52      0.49        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5435
              precision    recall  f1-score   support

           0       0.33      0.24      0.28        17
           1       0.62      0.72      0.67        29

    accuracy                           0.54        46
   macro avg       0.48      0.48      0.47        46
weighted avg       0.51      0.54      0.52        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





Loading data from ../data/cleaned_processed_data.csv...
Data loaded successfully with 181 training samples and 46 test samples.
Model: Random Forest, Accuracy: 0.5652
              precision    recall  f1-score   support

           0       0.36      0.24      0.29        17
           1       0.63      0.76      0.69        29

    accuracy                           0.57        46
   macro avg       0.50      0.50      0.49        46
weighted avg       0.53      0.57      0.54        46





KeyboardInterrupt: 