In [1]:
import os
import mlflow
import mlflow.sklearn
import numpy as np
from imblearn.over_sampling import SMOTE
from mlflow.models.signature import infer_signature
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, log_loss
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score



In [2]:
import pandas as pd
df = pd.read_csv("telecom_customer_churn_clean.csv")


In [3]:
df.head()

Unnamed: 0,Gender,Age,Married,Number of Dependents,City_0,City_1,City_2,City_3,City_4,City_5,...,Streaming TV,Streaming Movies,Streaming Music,Unlimited Data,Contract,Total Charges,Total Refunds,Total Extra Data Charges,Total Long Distance Charges,Customer Status
0,0,37,1,0,0,0,0,0,0,0,...,1,0,0,1,2,593.3,0.0,0,381.51,0
1,1,46,0,0,0,0,0,0,0,0,...,0,1,1,0,1,542.4,38.33,10,96.21,0
2,1,50,0,0,0,0,0,0,0,0,...,0,0,0,1,1,280.85,0.0,0,134.6,1
3,1,78,1,0,0,0,0,0,0,0,...,1,1,0,1,1,1237.85,0.0,0,361.66,1
4,0,75,1,0,0,0,0,0,0,0,...,1,0,0,1,1,267.4,0.0,0,22.14,1


### Set tracking URI to local folder

In [3]:
mlruns_path = os.path.join(os.getcwd(), "mlruns")
os.makedirs(mlruns_path, exist_ok=True)
mlflow.set_tracking_uri(f"file:///{mlruns_path}")

In [4]:
X = df.drop("Customer Status", axis=1).values
y = df["Customer Status"].values

#### Train-test split
#### Feature scaling

In [5]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=18)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [6]:
experiment_name = "SVM_model"
mlflow.set_experiment(experiment_name)
print(f"Experiment set to: {experiment_name}")

Experiment set to: SVM_model


  return FileStore(store_uri, store_uri)


In [8]:
C_value = 0.001
kernel_type = 'linear'

# unique run name
run_name = f"SVM_C={C_value}_kernel={kernel_type}_withoutGS"

with mlflow.start_run(run_name=run_name):

    # Train model
    model = SVC(C=C_value, kernel=kernel_type, probability=True)
    model.fit(X_train, y_train)

    # Evaluate
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    f1  = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall    = recall_score(y_test, y_pred, average='weighted')
    loss      = log_loss(y_test, y_prob)

    # Log param and metric
    mlflow.log_param("C", C_value)
    mlflow.log_param("kernel", kernel_type)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("loss", loss)

    signature = infer_signature(X_train, model.predict(X_train))
    
    # Log model
    mlflow.sklearn.log_model(sk_model=model, name="svm_model_noGS",
                            signature=signature,
                            input_example=X_train[:5])

    print(f" Run '{run_name}' logged with accuracy: {acc:.4f}")


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

 Run 'SVM_C=0.001_kernel=linear_withoutGS' logged with accuracy: 0.8300


In [9]:
C_values = [0.01, 0.1, 1, 10]
kernels = ['linear', 'rbf', 'poly']
gammas = ['scale', 'auto']

for C in C_values:
    for kernel in kernels:
        for gamma in gammas:

            run_name = f"SVM_C={C}_kernel={kernel}_gamma={gamma}"

            with mlflow.start_run(run_name=run_name):

                # Create model
                model = SVC(C=C, kernel=kernel, gamma=gamma, probability=True)

                # applying 5-fold cross-validation on the training set
                cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
                mean_cv_acc = np.mean(cv_scores)

                model.fit(X_train, y_train)

                y_pred = model.predict(X_test)
                test_acc = accuracy_score(y_test, y_pred)
                f1 = f1_score(y_test, y_pred, average='weighted')
                precision = precision_score(y_test, y_pred, average='weighted')
                recall = recall_score(y_test, y_pred, average='weighted')
                loss = log_loss(y_test, y_prob)

                # Model signature
                signature = infer_signature(X_train, model.predict(X_train))

                # Log parameters
                mlflow.log_param("C", C)
                mlflow.log_param("kernel", kernel)
                mlflow.log_param("gamma", gamma)

                # Log metrics
                mlflow.log_metric("cv_accuracy", mean_cv_acc)
                mlflow.log_metric("test_accuracy", test_acc)
                mlflow.log_metric("f1_score", f1)
                mlflow.log_metric("precision", precision)
                mlflow.log_metric("recall", recall)
                mlflow.log_metric("loss", loss)

                # Log model
                mlflow.sklearn.log_model(sk_model=model, name="svm_model_GS",
                                         signature=signature,
                                         input_example=X_train[:5])

                
                print(f"Logged: {run_name} | CV Accuracy = {mean_cv_acc:.4f} | Test Accuracy = {test_acc:.4f}")

print("✅ All runs logged successfully!")


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=linear_gamma=scale | CV Accuracy = 0.8406 | Test Accuracy = 0.8422


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=linear_gamma=auto | CV Accuracy = 0.8406 | Test Accuracy = 0.8422


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=rbf_gamma=scale | CV Accuracy = 0.7128 | Test Accuracy = 0.7307


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=rbf_gamma=auto | CV Accuracy = 0.7128 | Test Accuracy = 0.7307


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=poly_gamma=scale | CV Accuracy = 0.7128 | Test Accuracy = 0.7307


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.01_kernel=poly_gamma=auto | CV Accuracy = 0.7128 | Test Accuracy = 0.7307


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=linear_gamma=scale | CV Accuracy = 0.8469 | Test Accuracy = 0.8467


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=linear_gamma=auto | CV Accuracy = 0.8469 | Test Accuracy = 0.8467


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=rbf_gamma=scale | CV Accuracy = 0.8219 | Test Accuracy = 0.8316


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=rbf_gamma=auto | CV Accuracy = 0.8217 | Test Accuracy = 0.8316


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=poly_gamma=scale | CV Accuracy = 0.7473 | Test Accuracy = 0.7754


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=0.1_kernel=poly_gamma=auto | CV Accuracy = 0.7471 | Test Accuracy = 0.7754


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=linear_gamma=scale | CV Accuracy = 0.8488 | Test Accuracy = 0.8452


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=linear_gamma=auto | CV Accuracy = 0.8488 | Test Accuracy = 0.8452


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=rbf_gamma=scale | CV Accuracy = 0.8422 | Test Accuracy = 0.8392


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=rbf_gamma=auto | CV Accuracy = 0.8422 | Test Accuracy = 0.8392


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=poly_gamma=scale | CV Accuracy = 0.8368 | Test Accuracy = 0.8308


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=1_kernel=poly_gamma=auto | CV Accuracy = 0.8368 | Test Accuracy = 0.8308


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=linear_gamma=scale | CV Accuracy = 0.8482 | Test Accuracy = 0.8452


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=linear_gamma=auto | CV Accuracy = 0.8482 | Test Accuracy = 0.8452


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=rbf_gamma=scale | CV Accuracy = 0.8285 | Test Accuracy = 0.8149


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=rbf_gamma=auto | CV Accuracy = 0.8289 | Test Accuracy = 0.8149


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=poly_gamma=scale | CV Accuracy = 0.8061 | Test Accuracy = 0.7989


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Logged: SVM_C=10_kernel=poly_gamma=auto | CV Accuracy = 0.8065 | Test Accuracy = 0.7989
✅ All runs logged successfully!


In [7]:
smote = SMOTE(random_state=94)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

In [8]:
C_value = 0.001
kernel_type = 'linear'

# unique run name
run_name = f"SVM_C={C_value}_kernel={kernel_type}_withoutGS_Balanced"

with mlflow.start_run(run_name=run_name):

    # Train model
    model = SVC(C=C_value, kernel=kernel_type, probability=True, class_weight='balanced')
    model.fit(X_train_res, y_train_res)

    # Evaluate
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    f1  = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall    = recall_score(y_test, y_pred, average='weighted')
    loss      = log_loss(y_test, y_prob)

    # Log param and metric
    mlflow.log_param("C", C_value)
    mlflow.log_param("kernel", kernel_type)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("loss", loss)

    signature = infer_signature(X_train_res, model.predict(X_train_res))
    
    # Log model
    mlflow.sklearn.log_model(sk_model=model, name="svm_model_noGS_balanced",
                            signature=signature,
                            input_example=X_train_res[:5])

    print(f" Run '{run_name}' logged with accuracy: {acc:.4f}")


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

 Run 'SVM_C=0.001_kernel=linear_withoutGS_Balanced' logged with accuracy: 0.7785
