In [3]:
import torch
import tensorflow as tf
from google.cloud import aiplatform
import vertexai
from FlanT5 import T5FineTuner, tokenize_dataset
import os


In [6]:
# 1. Set up paths and configurations
LOCAL_MODEL_DIR = os.path.abspath("local_model")
SAVED_MODEL_DIR = os.path.abspath("saved_model")
GCS_BUCKET = "ontologykg2"
GCS_MODEL_PATH = f"gs://{GCS_BUCKET}/model/flant5"
PROJECT_ID = "deft-return-439619-h9"
REGION = "us-west1"

# Set up paths using os.path.join for cross-platform compatibility
LOCAL_CHECKPOINT_DIR = os.path.abspath("local_checkpoint/checkpoint_1000001")  # Use absolute path
LOCAL_EXPORT_DIR = os.path.abspath("exported_model")  # Use absolute path

CKPT_PATH = "epoch=3-step=2072-train_loss=0.35.ckpt"
#CKPT_PATH = "lightning_logs/version_34/epoch=3-step=2072-train_loss=0.35.ckpt"
#CKPT_PATH = "lightning_logs/version_30/final.ckpt"

Load custom model from checkpoint

In [7]:
# load the local model

checkpoint = torch.load(CKPT_PATH)
print(checkpoint.keys())

llm = T5FineTuner.load_from_checkpoint(CKPT_PATH)

llm.model.eval() # set model to evaluation mode
llm = llm.to("cpu") # use CPU since I don't have GPU
print("Done")

model = llm.model

dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers', 'hparams_name', 'hyper_parameters'])




Done


Load pretrained model from Hugging Face

In [23]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import TFT5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
#model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
tf_model = TFT5ForConditionalGeneration.from_pretrained("google/flan-t5-base")

print(f"Model type: {type(tf_model)}")

input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

outputs = tf_model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Model type: <class 'transformers.models.t5.modeling_tf_t5.TFT5ForConditionalGeneration'>




<pad> Wie old sind Sie?</s>


In [24]:
# 1. Convert PyTorch model to TensorFlow
def convert_pt_to_tf(llm):
    """Convert PyTorch T5 model to TensorFlow"""
    from transformers import TFT5ForConditionalGeneration
    
    # Create TF model with same config
    tf_model = TFT5ForConditionalGeneration.from_pretrained(
        llm.hparam.model_name_or_path,
        from_pt=True,
        config=llm.model.config
    )
    
    # Verify the conversion
    print("Model converted from PyTorch to TensorFlow")
    print(f"Model type: {type(tf_model)}")

    return tf_model

# Convert PyTorch model to TF
tf_model = convert_pt_to_tf(llm)
model = tf_model  # Use converted model for serving

In [17]:
@tf.function(input_signature=[{
    'input_ids': tf.TensorSpec(shape=(1,512), dtype=tf.int32, name='input_ids'),
    'attention_mask': tf.TensorSpec(shape=(1,512),dtype=tf.int32, name='attention_mask')
}])
def serving_fn(inputs):
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=128,
        num_beams=4,
        pad_token_id=model.config.pad_token_id,
        eos_token_id=model.config.eos_token_id,
        bos_token_id=model.config.bos_token_id,
        use_cache=True,
        do_sample=False,
        num_return_sequences=1,
        return_dict_in_generate=True,
        output_scores=True
    )
    return {'sequences': outputs.sequences}


from CustomDataset import CustomDataset
from torch.utils.data import DataLoader
import pandas as pd

from transformers import (
    AutoTokenizer
)

tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-small')

#from app.utils.flant5_client import FlanT5Client
def _get_instance(text):
    """
    Reformats text in FlanT5's CustomDataset format
    """
    df = pd.DataFrame(data={'title':'', 'sent':text}, index=[0])
    dataset = CustomDataset(tokenizer=tokenizer, dataset=df, type_path='test')

    dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)
    for batch in dataloader:
        input_ids =  batch['source_ids']
        attention_mask =  batch['source_mask']
        break
    
    return input_ids, attention_mask

# 2.5 Test the serving function
def test_serving_fn():

    user_query = "I am looking for a gift card suitable as a birthday gift for a writer."
    input_ids, attention_mask = _get_instance(user_query)

    # Create dummy input
    dummy_input = {
        'input_ids': input_ids,
        'attention_mask': attention_mask
    }
    
    # Test the function
    result = serving_fn(dummy_input)
    print("Serving function test successful")
    return result

# Test before saving
test_serving_fn()



Serving function test successful


{'sequences': <tf.Tensor: shape=(1, 128), dtype=int32, numpy=
 array([[   0,   27,  183,  479,   21,    3,    9, 1876,  895, 3255,   38,
            3,    9, 3591, 1876,   21,    3,    9, 4346,    5,    1,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0]])>}

In [25]:
# 3. Save as SavedModel format for VectorAI
tf.saved_model.save(
    model,
    SAVED_MODEL_DIR
)
print(f"SavedModel saved to {SAVED_MODEL_DIR}")

INFO:tensorflow:Assets written to: c:\Users\lawfu\Documents\_Library\_School\SJSU MS Data Analytics\202403_Fall_DATA-298B_FinalProject\KGRecommender\models\flant5\saved_model\assets


INFO:tensorflow:Assets written to: c:\Users\lawfu\Documents\_Library\_School\SJSU MS Data Analytics\202403_Fall_DATA-298B_FinalProject\KGRecommender\models\flant5\saved_model\assets


SavedModel saved to c:\Users\lawfu\Documents\_Library\_School\SJSU MS Data Analytics\202403_Fall_DATA-298B_FinalProject\KGRecommender\models\flant5\saved_model


Manually upload the SavedModel savedmodel.pb to the GSC Bucket

In [26]:
GCS_BUCKET = "ontologykg2"
GCS_MODEL_PATH = f"gs://{GCS_BUCKET}/model/flant5"

# 4. Upload to GCS and deploy to Vertex AI
print("Uploading to GCS and deploying to Vertex AI...")
vertexai.init(project=PROJECT_ID, location=REGION)

# Upload the model to Vertex AI's Model Registry
model = aiplatform.Model.upload(
    display_name="flan-t5-base",
    artifact_uri=GCS_MODEL_PATH,
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-gpu.2-12:latest", 
)

# Specify your existing endpoint ID
PROJECT_ID = "371748443295"
ENDPOINT_REGION = "us-west1"
ENDPOINT_ID = "8518052919822516224"
# Retrieve the existing endpoint
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{PROJECT_ID}/locations/{ENDPOINT_REGION}/endpoints/{ENDPOINT_ID}")

Uploading to GCS and deploying to Vertex AI...
Creating Model


INFO:google.cloud.aiplatform.models:Creating Model


Create Model backing LRO: projects/371748443295/locations/us-west1/models/2985046526063017984/operations/104966526813077504


INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/371748443295/locations/us-west1/models/2985046526063017984/operations/104966526813077504


Model created. Resource name: projects/371748443295/locations/us-west1/models/2985046526063017984@1


INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/371748443295/locations/us-west1/models/2985046526063017984@1


To use this Model in another session:


INFO:google.cloud.aiplatform.models:To use this Model in another session:


model = aiplatform.Model('projects/371748443295/locations/us-west1/models/2985046526063017984@1')


INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/371748443295/locations/us-west1/models/2985046526063017984@1')


Manually deploy the model to the endpoint in GSC Console to set min_replica_co9unt to none.

GSC Consol > Vertex AI > Model Registry > flan-t5-base > Deploy & Test > Deploy to Endpoint


In [None]:
# Deploy the model to the endpoint
model.deploy(
    endpoint=endpoint,
    machine_type="n1-standard-4",  # 4vCPUs, 15GB memory
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1,
    min_replica_count=0, # fails.
    #max_replica_count=1, # leave max replica count blank to turn off auto-scaling
)

print(f"Model deployed to endpoint: {endpoint.resource_name}")


    Uploading to GCS and deploying to Vertex AI...
    Creating Model
    INFO:google.cloud.aiplatform.models:Creating Model
    Create Model backing LRO: projects/371748443295/locations/us-west1/models/5844832289443282944/operations/7813018026546036736
    INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/371748443295/locations/us-west1/models/5844832289443282944/operations/7813018026546036736
    Model created. Resource name: projects/371748443295/locations/us-west1/models/5844832289443282944@1
    INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/371748443295/locations/us-west1/models/5844832289443282944@1
    To use this Model in another session:
    INFO:google.cloud.aiplatform.models:To use this Model in another session:
    model = aiplatform.Model('projects/371748443295/locations/us-west1/models/5844832289443282944@1')
    INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/371748443295/locations/us-west1/models/5844832289443282944@1')
   
    Creating Endpoint
    INFO:google.cloud.aiplatform.models:Creating Endpoint
    Create Endpoint backing LRO: projects/371748443295/locations/us-west1/endpoints/8518052919822516224/operations/8211586593568325632
    INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/371748443295/locations/us-west1/endpoints/8518052919822516224/operations/8211586593568325632
    Endpoint created. Resource name: projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    To use this Endpoint in another session:
    INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
    endpoint = aiplatform.Endpoint('projects/371748443295/locations/us-west1/endpoints/8518052919822516224')
    INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/371748443295/locations/us-west1/endpoints/8518052919822516224')
    Deploying model to Endpoint : projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    Deploy Endpoint model backing LRO: projects/371748443295/locations/us-west1/endpoints/8518052919822516224/operations/1435920954189414400
    INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/371748443295/locations/us-west1/endpoints/8518052919822516224/operations/1435920954189414400
    Endpoint model deployed. Resource name: projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/371748443295/locations/us-west1/endpoints/8518052919822516224
    Model deployed to endpoint: projects/371748443295/locations/us-west1/endpoints/8518052919822516224

In [None]:
#flant5 = FlanT5Client() #uses FLANT5_ENDPOINT set in utils.rag_constants.py by default
# Test prediction
test_text = "Translate to French: Hello, how are you?"
response = model.generate_content(test_text)
print(f"Test prediction response: {response}")