In [1]:
import mlflow
import tensorflow as tf
from transformers import AutoTokenizer
import os

In [8]:
import mlflow.pyfunc
import tensorflow as tf
from safetensors.tensorflow import load_file  

input_shape = 1 

class SafeTensorModel(mlflow.pyfunc.PythonModel):
    def __init__(self, input_shape):
        self.input_shape = input_shape
        self.model = self.build_model()  # Build the model when the instance is created

    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.Input(shape=(self.input_shape,)),  
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid')  
        ])
        return model

    def predict(self, context, model_input):
        return self.model.predict(model_input)

In [9]:
import os
import numpy as np
import mlflow
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from mlflow.models import ModelSignature
from mlflow.types import Schema, ColSpec
import shutil
import json

# Start an MLflow run
with mlflow.start_run() as run:

    # Path to your model
    model_directory = r"D:\Study\DEPI-Gneratve AI\Final_project\chat2\model"

    # Create a temporary directory to copy your model files (optional)
    temp_directory = os.path.join(model_directory, "temp_model_dir")
    os.makedirs(temp_directory, exist_ok=True)

    # Copy the model files to the temporary directory
    for file_name in os.listdir(model_directory):
        full_file_name = os.path.join(model_directory, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, temp_directory)
    
    # Log the model architecture
    mlflow.log_artifact(model_directory, artifact_path="safetensors_file")

    # Log the model
    input_shape = 1  # Adjust according to your model's expected input shape
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=SafeTensorModel(input_shape=input_shape),
        artifacts={
            "safetensors_file": model_directory
             }
    )

    # Log the model tokenizer
    local_tokenizer_path = r"D:\Study\DEPI-Gneratve AI\Final_project\chat2\token"
    tokenizer = AutoTokenizer.from_pretrained(local_tokenizer_path)
    
    # Log all files in the tokenizer directory
    for filename in os.listdir(local_tokenizer_path):
        file_path = os.path.join(local_tokenizer_path, filename)
        if os.path.isfile(file_path):  # Ensure it's a file
            mlflow.log_artifact(file_path, artifact_path="tokenizer")



    # Load parameters from the JSON file
    with open("D:\Study\DEPI-Gneratve AI\Final_project\chat2\parameters-2.json", 'r') as f:
        params = json.load(f)

    for key, value in params.items():
         mlflow.log_param(key, value)

    # Log metrics
    for epoch in range(params['num_epochs']):
        loss = 1.773  
        mlflow.log_metric("loss", loss, step=epoch)


    # # # Example input data as a DataFrame
    # input_example = "i love cairo"

    # Define the input schema and output schema for the model signature
    input_schema = Schema([
        ColSpec("string")
    ])
    output_schema = Schema([
        ColSpec("string")  # Adjust according to your output
    ])

    signature = ModelSignature(inputs=input_schema, outputs=output_schema)


    # Now log the model, passing the input_shape
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=SafeTensorModel(input_shape=input_shape),  # Pass input_shape here
        artifacts={
            "safetensors_file": os.path.join(temp_directory, "model.safetensors")  # Reference the safetensors file in the temp directory
        },
         code_paths=[temp_directory]  # This will log the entire directory    
        ,
        conda_env={
            'channels': ['defaults'],
            'dependencies': [
                'python=3.9',  
                'cloudpickle=3.1.0',
                'pandas' 
            ],
            'name': 'Gen_environment' 
        },
        # input_example=input_example,  # Pass the input example as DataFrame
        signature=signature  # Pass the model signature
    )
    # clean up the temporary directory after logging
    shutil.rmtree(temp_directory)

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]



Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
# To load and use the model later
model_uri = f"runs:/{run.info.run_id}/model"
loaded_model = mlflow.pyfunc.load_model(model_uri)