In [2]:
import os
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from mlflow.models.signature import infer_signature
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, log_loss
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn



In [3]:
# Load data
df = pd.read_csv('data/telecom_customer_churn_clean.csv')

In [4]:
df.head()

Unnamed: 0,Gender,Age,Married,Number of Dependents,City_0,City_1,City_2,City_3,City_4,City_5,...,Streaming TV,Streaming Movies,Streaming Music,Unlimited Data,Contract,Total Charges,Total Refunds,Total Extra Data Charges,Total Long Distance Charges,Customer Status
0,0,37,1,0,0,0,0,0,0,0,...,1,0,0,1,2,593.3,0.0,0,381.51,0
1,1,46,0,0,0,0,0,0,0,0,...,0,1,1,0,1,542.4,38.33,10,96.21,0
2,1,50,0,0,0,0,0,0,0,0,...,0,0,0,1,1,280.85,0.0,0,134.6,1
3,1,78,1,0,0,0,0,0,0,0,...,1,1,0,1,1,1237.85,0.0,0,361.66,1
4,0,75,1,0,0,0,0,0,0,0,...,1,0,0,1,1,267.4,0.0,0,22.14,1


In [5]:
df.columns

Index(['Gender', 'Age', 'Married', 'Number of Dependents', 'City_0', 'City_1',
       'City_2', 'City_3', 'City_4', 'City_5', 'City_6', 'City_7', 'City_8',
       'City_9', 'City_10', 'Number of Referrals', 'Tenure in Months',
       'Phone Service', 'Multiple Lines', 'Internet Service',
       'Internet Type_0', 'Internet Type_1', 'Avg Monthly GB Download',
       'Device Protection Plan', 'Premium Tech Support', 'Streaming TV',
       'Streaming Movies', 'Streaming Music', 'Unlimited Data', 'Contract',
       'Total Charges', 'Total Refunds', 'Total Extra Data Charges',
       'Total Long Distance Charges', 'Customer Status'],
      dtype='object')

In [6]:
mlruns_path = os.path.join(os.getcwd(), "mlruns")
os.makedirs(mlruns_path, exist_ok=True)
mlflow.set_tracking_uri(f"file:///{mlruns_path}")

In [7]:
X = df.drop("Customer Status", axis=1).values
y = df["Customer Status"].values

In [8]:
# Train / test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# ---- Pipeline: Scaling + Logistic Regression ----
pipeline = Pipeline([("scaler", StandardScaler()), ("logreg", LogisticRegression(max_iter=500, solver="liblinear", C=1.0))])

In [10]:
# ---- MLflow tracking ----
with mlflow.start_run():

    # Train
    pipeline.fit(X_train, y_train)

    # Predictions
    y_pred = pipeline.predict(X_test)
    y_proba = pipeline.predict_proba(X_test)

    # ---- Metrics/Evaluate ----
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    loss = log_loss(y_test, y_proba)    

    # Log parameters (manual, since no GridSearch)
    mlflow.log_param("C", 1.0)
    mlflow.log_param("solver", "liblinear")
    mlflow.log_param("max_iter", 500)

    # Log metric
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("log_loss", loss)
    
    # Log entire pipeline (scaler + model together)
    mlflow.sklearn.log_model(
        pipeline,
        artifact_path="logreg_scaled_pipeline"
    )

    # ---- Print results ----
    print("Metrics:")
    print(f" Accuracy  : {accuracy:.4f}")
    print(f" Precision : {precision:.4f}")
    print(f" Recall    : {recall:.4f}")
    print(f" F1-score  : {f1:.4f}")
    print(f" Log Loss  : {loss:.4f}")    

  return FileStore(store_uri, store_uri)


MlflowException: When an mlflow-artifacts URI was supplied, the tracking URI must be a valid http or https URI, but it was currently set to file:///C:\projects\cognitix\mlruns. Perhaps you forgot to set the tracking URI to the running MLflow server. To set the tracking URI, use either of the following methods:
1. Set the MLFLOW_TRACKING_URI environment variable to the desired tracking URI. `export MLFLOW_TRACKING_URI=http://localhost:5000`
2. Set the tracking URI programmatically by calling `mlflow.set_tracking_uri`. `mlflow.set_tracking_uri('http://localhost:5000')`