In [5]:
!pip install google-cloud-storage



In [6]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import os

In [2]:
# MLflow setup
mlflow.set_tracking_uri("http://127.0.0.1:5000/")  # Set the tracking URI to the Minikube service

# Ensure Google Application Credentials are set for GCS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cs505.json"

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# Assuming X_train, y_train, X_test, y_test are already defined

# Model 1: Poorly performing model
# Using very strong regularization (C very small) to underfit
C_poor = 0.001
solver_poor = 'liblinear'

with mlflow.start_run(run_name="Poor_Model"):
    clf_poor = LogisticRegression(C=C_poor, solver=solver_poor, random_state=42)
    clf_poor.fit(X_train[:30], y_train[:30])  # Using only a small part of the training data

    predictions_poor = clf_poor.predict(X_test)
    accuracy_poor = accuracy_score(y_test, predictions_poor)

    mlflow.log_param("C", C_poor)
    mlflow.log_param("solver", solver_poor)
    mlflow.log_metric("accuracy", accuracy_poor)
    mlflow.sklearn.log_model(clf_poor, 
                             artifact_path="logistic-regression-model-poor", registered_model_name="demo_model")

    print(f"Poor Logistic Regression model accuracy: {accuracy_poor:.2f}")



Registered model 'demo_model' already exists. Creating a new version of this model...
2023/12/23 13:45:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: demo_model, version 4
Created version '4' of model 'demo_model'.


Poor Logistic Regression model accuracy: 0.63


In [4]:
# Model 2: Well-performing model
# Using standard regularization and full training data
C_good = 1.0
solver_good = 'liblinear'

with mlflow.start_run(run_name="Good_Model"):
    clf_good = LogisticRegression(C=C_good, solver=solver_good, random_state=42)
    clf_good.fit(X_train, y_train)

    predictions_good = clf_good.predict(X_test)
    accuracy_good = accuracy_score(y_test, predictions_good)

    mlflow.log_param("C", C_good)
    mlflow.log_param("solver", solver_good)
    mlflow.log_metric("accuracy", accuracy_good)
    mlflow.sklearn.log_model(clf_good, 
                             artifact_path="logistic-regression-model-good", 
                             registered_model_name="demo_model")

    print(f"Good Logistic Regression model accuracy: {accuracy_good:.2f}")


Registered model 'demo_model' already exists. Creating a new version of this model...
2023/12/23 13:46:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: demo_model, version 5
Created version '5' of model 'demo_model'.


Good Logistic Regression model accuracy: 1.00


In [5]:
# Model 2: Well-performing model
# Using standard regularization and full training data
C_good = 1.0
solver_good = 'liblinear'

with mlflow.start_run(run_name="Good_Model"):
    clf_good = LogisticRegression(C=C_good, solver=solver_good, random_state=42)
    clf_good.fit(X_train, y_train)

    predictions_good = clf_good.predict(X_test)
    accuracy_good = accuracy_score(y_test, predictions_good)

    mlflow.log_param("C", C_good)
    mlflow.log_param("solver", solver_good)
    mlflow.log_metric("accuracy", accuracy_good)
    mlflow.sklearn.log_model(clf_good, 
                             artifact_path="logistic-regression-model-good", 
                             registered_model_name="demo_model")

    print(f"Good Logistic Regression model accuracy: {accuracy_good:.2f}")


Registered model 'demo_model' already exists. Creating a new version of this model...
2023/12/23 13:46:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: demo_model, version 6
Created version '6' of model 'demo_model'.


Good Logistic Regression model accuracy: 1.00


In [8]:
import os
import mlflow
from transformers import BertTokenizerFast, EncoderDecoderModel
import torch

# MLflow setup
mlflow.set_tracking_uri("http://127.0.0.1:5000/")  # Set the tracking URI to your MLflow server
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cs505.json"

# BERT Summarization Model setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization'
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = EncoderDecoderModel.from_pretrained(model_name).to(device)

# Log BERT model to MLflow
with mlflow.start_run(run_name="BERT_Summarization_Model"):
    # Log model details
    mlflow.log_param("model_name", model_name)

    # Log the BERT model
    # Note: You need a custom method to log the transformers model to MLflow
    # This is a placeholder for model logging. The actual implementation will
    # depend on how you intend to use and deploy the model.
    # Example: mlflow.pytorch.log_model(model, "bert_summarization_model")
    mlflow.log_param("max_length", 512)  # Logging the max_length parameter

    print("BERT summarization model logged to MLflow")


BERT summarization model logged to MLflow


In [9]:
import os
import mlflow
from transformers import BertTokenizerFast, EncoderDecoderModel
import torch


# MLflow setup
mlflow.set_tracking_uri("http://127.0.0.1:5000/")  # Set the tracking URI to your MLflow server
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cs505.json"


# BERT Summarization Model setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = 'mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization'
tokenizer = BertTokenizerFast.from_pretrained(model_name)
model = EncoderDecoderModel.from_pretrained(model_name).to(device)



# Function to log hyperparameters during summarization
def log_user_hyperparameters(temperature, top_k, top_p):
    with mlflow.start_run(run_name="User Generated Summarization Parameters"):
        mlflow.log_param("temperature", temperature)
        mlflow.log_param("top_k", top_k)
        mlflow.log_param("top_p", top_p)
        print(f"Logged user hyperparameters: temperature={temperature}, top_k={top_k}, top_p={top_p}")

         
# Log BERT model to MLflow
with mlflow.start_run(run_name="BERT_Summarization_Model"):
    # Log model details
    mlflow.log_param("model_name", model_name)
    mlflow.log_param("max_length", 512)  # Logging the max_length parameter

    # Placeholder for model logging
    # Example: mlflow.pytorch.log_model(model, "bert_summarization_model")
    print("BERT summarization model logged to MLflow")

    
# The function `log_user_hyperparameters` should be called from the Streamlit app 
# when generating a summary.


BERT summarization model logged to MLflow
