In [8]:
import pandas as pd

# Load the dataset
df = pd.read_csv("/Users/manjusharavindranath/Downloads/BreastCancer_data.csv")
df.head()


#Dropping columns that are not needed
df = df.drop(columns=['id', 'Unnamed: 32'])

#Map the target to binary values: 'M' to 1 (malignant), 'B' to 0 (benign)
df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

# Separate features and target datasets
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=102)


from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 10000,
    "multi_class": "auto",
    "random_state": 8888,
}
 

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

#train the model
model = LogisticRegression(**params)
model.fit(X_train, y_train)

#Predict and evaluate the model
y_pred = model.predict(X_test)
print("Logistic Regression:")
print(classification_report(y_test, y_pred))

class_report = classification_report(y_test, y_pred,output_dict= True)

#create a metrics which we want to log




Logistic Regression:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        75
           1       0.97      0.95      0.96        39

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



In [9]:
import mlflow

mlflow.set_experiment("cancer_data")
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': class_report['accuracy'],
        'recall_class_0': class_report['0']['recall'],
        'recall_class_1': class_report['1']['recall'],
        'f1_score': class_report['macro avg']['f1-score']
        })
    mlflow.sklearn.log_model(model, "Logistic Regression")

In [10]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Decision Tree Classifier:")
print(classification_report(y_test, y_pred))

Decision Tree Classifier:
              precision    recall  f1-score   support

           0       0.96      0.97      0.97        75
           1       0.95      0.92      0.94        39

    accuracy                           0.96       114
   macro avg       0.95      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [11]:
class_report = classification_report(y_test, y_pred,output_dict= True)


In [12]:
mlflow.set_experiment("cancer_data_decisiontree")
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': class_report['accuracy'],
        'recall_class_0': class_report['0']['recall'],
        'recall_class_1': class_report['1']['recall'],
        'f1_score': class_report['macro avg']['f1-score']
        })
    mlflow.sklearn.log_model(model, "Decision Tree")

2024/08/17 10:07:42 INFO mlflow.tracking.fluent: Experiment with name 'cancer_data_decisiontree' does not exist. Creating a new experiment.
 - mlflow (current: 2.15.1, required: mlflow==2.12.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


In [13]:
from sklearn.svm import SVC

model = SVC()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Support Vector Machine (SVM):")
print(classification_report(y_test, y_pred))

Support Vector Machine (SVM):
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        75
           1       0.95      0.95      0.95        39

    accuracy                           0.96       114
   macro avg       0.96      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114



In [15]:
class_report = classification_report(y_test, y_pred,output_dict= True)

In [16]:
mlflow.set_experiment("cancer_data_SVM")
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': class_report['accuracy'],
        'recall_class_0': class_report['0']['recall'],
        'recall_class_1': class_report['1']['recall'],
        'f1_score': class_report['macro avg']['f1-score']
        })
    mlflow.sklearn.log_model(model, "SVM")

2024/08/17 10:11:39 INFO mlflow.tracking.fluent: Experiment with name 'cancer_data_SVM' does not exist. Creating a new experiment.
 - mlflow (current: 2.15.1, required: mlflow==2.12.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
