<a href="https://colab.research.google.com/github/olonok69/LLM_Notebooks/blob/main/mlflow/deep_learning/MLFlow_tensorflow_flavor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#MLFLOW
https://mlflow.org/docs/latest/introduction/index.html


MLflow is a solution to many of these issues in this dynamic landscape, offering tools and simplifying processes to streamline the ML lifecycle and foster collaboration among ML practitioners.



# MLflow Tensorflow Guide
https://mlflow.org/docs/latest/deep-learning/tensorflow/index.html

# ngrok
Connect localhost to the internet for testing applications and APIs
Bring secure connectivity to apps and APIs in localhost and dev/test environments with just one command or function call.
- Webhook testing
- Developer Previews
- Mobile backend testing

https://ngrok.com/


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install mlflow pyngrok evaluate  bitsandbytes accelerate datasets transformers==4.39.3 --quiet
get_ipython().system_raw("mlflow ui --port 5000 &")

In [3]:

from pyngrok import ngrok
from getpass import getpass

# Terminate open tunnels if exist
ngrok.kill()

In [4]:
from google.colab import userdata
NGROK_AUTH_TOKEN  = userdata.get('NGROK')

ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

MLflow Tracking UI: https://a55a-35-240-182-192.ngrok-free.app


In [6]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing

import mlflow
from mlflow.models import infer_signature
print(tf.__version__)


2.15.0


In [7]:
tf.config.list_physical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [8]:
class Normalize(tf.Module):
    """Data Normalization class"""

    def __init__(self, x):
        # Initialize the mean and standard deviation for normalization
        self.mean = tf.math.reduce_mean(x, axis=0)
        self.std = tf.math.reduce_std(x, axis=0)

    def norm(self, x):
        return (x - self.mean) / self.std

    def unnorm(self, x):
        return (x * self.std) + self.mean


class LinearRegression(tf.Module):
    """Linear Regression model class"""

    def __init__(self):
        self.built = False

    @tf.function
    def __call__(self, x):
        # Initialize the model parameters on the first call
        if not self.built:
            # Randomly generate the weight vector and bias term
            rand_w = tf.random.uniform(shape=[x.shape[-1], 1])
            rand_b = tf.random.uniform(shape=[])
            self.w = tf.Variable(rand_w)
            self.b = tf.Variable(rand_b)
            self.built = True
        y = tf.add(tf.matmul(x, self.w), self.b)
        return tf.squeeze(y, axis=1)


class ExportModule(tf.Module):
    """Exporting TF model"""

    def __init__(self, model, norm_x, norm_y):
        # Initialize pre and postprocessing functions
        self.model = model
        self.norm_x = norm_x
        self.norm_y = norm_y

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)])
    def __call__(self, x):
        # Run the ExportModule for new data points
        x = self.norm_x.norm(x)
        y = self.model(x)
        y = self.norm_y.unnorm(y)
        return y


def mse_loss(y_pred, y):
    """Calculating Mean Square Error Loss function"""
    return tf.reduce_mean(tf.square(y_pred - y))

# Prepare the Data

In [9]:
tf.random.set_seed(42)

# Load dataset
dataset = fetch_california_housing(as_frame=True)["frame"]
# Drop missing values
dataset = dataset.dropna()
# using only 1500
dataset = dataset[:1500]
dataset_tf = tf.convert_to_tensor(dataset, dtype=tf.float32)

# Split dataset into train and test
dataset_shuffled = tf.random.shuffle(dataset_tf, seed=42)
train_data, test_data = dataset_shuffled[100:], dataset_shuffled[:100]
x_train, y_train = train_data[:, :-1], train_data[:, -1]
x_test, y_test = test_data[:, :-1], test_data[:, -1]

In [10]:
# Data normalization
norm_x = Normalize(x_train)
norm_y = Normalize(y_train)
x_train_norm, y_train_norm = norm_x.norm(x_train), norm_y.norm(y_train)
x_test_norm, y_test_norm = norm_x.norm(x_test), norm_y.norm(y_test)

In [11]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("mlflow-tensorflow")

<Experiment: artifact_location='mlflow-artifacts:/767691094531472360', creation_time=1715713380815, experiment_id='767691094531472360', last_update_time=1715713380815, lifecycle_stage='active', name='mlflow-tensorflow', tags={}>

In [16]:
from datetime import datetime

name = "tensorflow_" +datetime.now().strftime("%Y-%m-%d_%H:%M:%S")


with mlflow.start_run(run_name = name) as run:
    # Initialize linear regression model
    lin_reg = LinearRegression()

    # Use mini batches for memory efficiency and faster convergence
    batch_size = 32
    train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train_norm))
    train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)
    test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test_norm))
    test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)

    # Set training parameters
    epochs = 100
    learning_rate = 0.01
    train_losses, test_losses = [], []

    # Format training loop
    for epoch in range(epochs):
        batch_losses_train, batch_losses_test = [], []

        # Iterate through the training data
        for x_batch, y_batch in train_dataset:
            with tf.GradientTape() as tape:
                y_pred_batch = lin_reg(x_batch)
                batch_loss = mse_loss(y_pred_batch, y_batch)
            # Update parameters with respect to the gradient calculations
            grads = tape.gradient(batch_loss, lin_reg.variables)
            for g, v in zip(grads, lin_reg.variables):
                v.assign_sub(learning_rate * g)
            # Keep track of batch-level training performance
            batch_losses_train.append(batch_loss)

        # Iterate through the testing data
        for x_batch, y_batch in test_dataset:
            y_pred_batch = lin_reg(x_batch)
            batch_loss = mse_loss(y_pred_batch, y_batch)
            # Keep track of batch-level testing performance
            batch_losses_test.append(batch_loss)

        # Keep track of epoch-level model performance
        train_loss = tf.reduce_mean(batch_losses_train)
        test_loss = tf.reduce_mean(batch_losses_test)
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        if epoch % 10 == 0:
            mlflow.log_metric(key="train_losses", value=train_loss, step=epoch)
            mlflow.log_metric(key="test_losses", value=test_loss, step=epoch)
            print(f"Mean squared error for step {epoch}: {train_loss.numpy():0.3f}")

    # Log the parameters
    mlflow.log_params(
        {
            "epochs": epochs,
            "learning_rate": learning_rate,
            "batch_size": batch_size,
        }
    )
    # Log the final metrics
    mlflow.log_metrics(
        {
            "final_train_loss": train_loss.numpy(),
            "final_test_loss": test_loss.numpy(),
        }
    )
    print(f"\nFinal train loss: {train_loss:0.3f}")
    print(f"Final test loss: {test_loss:0.3f}")

    # Export the tensorflow model
    lin_reg_export = ExportModule(model=lin_reg, norm_x=norm_x, norm_y=norm_y)

    # Infer model signature
    predictions = lin_reg_export(x_test)
    signature = infer_signature(x_test.numpy(), predictions.numpy())

    mlflow.tensorflow.log_model(lin_reg_export, "model", signature=signature, registered_model_name="tensorflow_lr")

Mean squared error for step 0: 1.398
Mean squared error for step 10: 0.334
Mean squared error for step 20: 0.333
Mean squared error for step 30: 0.335
Mean squared error for step 40: 0.333
Mean squared error for step 50: 0.338
Mean squared error for step 60: 0.336
Mean squared error for step 70: 0.333
Mean squared error for step 80: 0.335
Mean squared error for step 90: 0.333

Final train loss: 0.333
Final test loss: 0.260


Successfully registered model 'tensorflow_lr'.
2024/05/14 20:47:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tensorflow_lr, version 1
Created version '1' of model 'tensorflow_lr'.


In [13]:
run.info.run_id

'fa74c244f29c41008e80070446a68b9f'

In [14]:
logged_model = f"runs:/{run.info.run_id}/model"
loaded_model = mlflow.pyfunc.load_model(logged_model)

Downloading artifacts:   0%|          | 0/12 [00:00<?, ?it/s]

2024/05/14 20:42:05 INFO mlflow.store.artifact.artifact_repo: The progress bar can be disabled by setting the environment variable MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR to false


In [15]:
outputs = loaded_model.predict(x_test.numpy())
outputs

array([2.2163126 , 1.6451656 , 1.804493  , 1.2201953 , 0.9080635 ,
       1.9017402 , 0.98906505, 1.3687284 , 1.9475015 , 2.0216427 ,
       1.7333492 , 2.47976   , 2.6459627 , 1.9686491 , 1.8496821 ,
       2.5025854 , 1.4471438 , 1.5147371 , 0.8561319 , 1.6534791 ,
       2.0665548 , 2.272903  , 1.4881923 , 1.9272366 , 2.1707692 ,
       1.1793005 , 2.5638027 , 3.6020558 , 1.5714581 , 3.479729  ,
       0.38970757, 0.91512764, 3.138976  , 2.6060925 , 2.3265615 ,
       0.83257914, 1.112021  , 1.8539352 , 2.5160146 , 1.8843647 ,
       1.8942856 , 1.3865997 , 2.0320039 , 2.1332588 , 2.2617807 ,
       0.81241846, 1.7360295 , 1.026869  , 2.1482837 , 2.2589004 ,
       2.1978738 , 2.290679  , 0.6576586 , 1.2034651 , 1.1126769 ,
       1.7134082 , 2.5083458 , 1.9382279 , 1.5992904 , 2.342893  ,
       1.224571  , 1.4508934 , 0.35418236, 1.2836893 , 1.6263521 ,
       2.066717  , 1.31025   , 2.6582181 , 1.3381137 , 1.1638503 ,
       2.8206224 , 1.7700377 , 1.8327236 , 1.3529251 , 3.18170

In [None]:
ngrok.kill()