In [11]:
from snowflake.snowpark.session import Session
from snowflake.ml.model.models import huggingface_pipeline
from snowflake.ml.registry import model_registry
from snowflake.ml.model import deploy_platforms
from snowflake.snowpark import VERSION
import snowflake.snowpark.functions as F

import sys
import os
import json
import pandas as pd
pd.set_option('display.max_colwidth', None)

import streamlit as st

In [12]:
# Create Snowflake Session object
connection_parameters = json.load(open('../connection.json'))
session = Session.builder.configs(connection_parameters).create()
session.sql_simplifier_enabled = True

snowflake_environment = session.sql('select current_user(), current_version()').collect()
snowpark_version = VERSION

# Current Environment Details
print('User                        : {}'.format(snowflake_environment[0][0]))
print('Role                        : {}'.format(session.get_current_role()))
print('Database                    : {}'.format(session.get_current_database()))
print('Schema                      : {}'.format(session.get_current_schema()))
print('Warehouse                   : {}'.format(session.get_current_warehouse()))
print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

User                        : DASH
Role                        : "SERVICESNOW_USER_ROLE"
Database                    : "DASH_DB"
Schema                      : "DASH_SCHEMA"
Warehouse                   : "DASH_L"
Snowflake version           : 7.39.4
Snowpark for Python version : 1.8.0


In [13]:

create_compute_pool = \
"""CREATE COMPUTE POOL DASH_BUILD_GPU3
MIN_NODES = 1
MAX_NODES = 1
INSTANCE_FAMILY = GPU_3
AUTO_SUSPEND_SECS = 7200
"""
# session.sql(create_compute_pool).collect()

In [14]:
llama_model = huggingface_pipeline.HuggingFacePipelineModel(
    task="text-generation",
    model="meta-llama/Llama-2-7b-chat-hf",
    token=os.getenv("HUGGING_CASE_TOKEN"),
    return_full_text=False,
    max_new_tokens=100)

sys.getsizeof(llama_model)

48

In [15]:
MODEL_NAME = 'LLAMA2_7b_CHAT'
MODEL_VERSION = "1.5"

registry = model_registry.ModelRegistry(
    session=session, 
    database_name="DASH_DB", 
    schema_name="DASH_SCHEMA", 
    create_if_not_exists=True)

# NOTE: log_model and deploy are one time operations; 
#       once the model is logged and deployed, use ModeReference to get the reference to the model

# llama_model_ref = registry.log_model(
#     model_name=MODEL_NAME,
#     model_version=MODEL_VERSION,
#     model=llama_model
# )

# llama_model_ref.deploy(
#     deployment_name="llama_predict", 
#     platform=deploy_platforms.TargetPlatform.SNOWPARK_CONTAINER_SERVICES,
#     permanent=True, 
#     options={"compute_pool": "DASH_BUILD_GPU3", "num_gpus": 1})

llama_model_ref = model_registry.ModelReference(registry=registry,model_name=MODEL_NAME,model_version=MODEL_VERSION)
llama_model_ref



<snowflake.ml.registry.model_registry.ModelReference at 0x293b69c60>

In [16]:
# NOTE: reading data in JSON and storing it in a Snoflake table is are one time operations; 
#       once the data is loaded, use Snowpark to load the data from the table

# df = pd.read_json("../day1/data/frosty_transcripts_all_v8.json",lines=True)
# sf_df = session.write_pandas(df,'frosty_transcripts',auto_create_table=True,quote_identifiers=False)

sf_df = session.table('frosty_transcripts')
sf_df.show(max_width=600)

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"LANGUAGE"  |"TRANSCRIPT"                                                                                                                                                                               |"NAME"  |"LOCATION"  |"TOY_LIST"                                                                |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|EN          |caller: Hello!                                                                        

In [17]:
begin_prompt = \
"""
[INST] Summarize this transcript in less than 200 words: 
"""
end_prompt = " [/INST]"

df_inputs = sf_df.with_column('"inputs"',F.concat_ws(F.lit(" "),F.lit(begin_prompt),F.col('transcript'),F.lit(end_prompt))).select('"inputs"')
df_inputs.show(max_width=600)

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"inputs"                                                                                                                                                                                   |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|                                                                                                                                                                                           |
|[INST] Summarize this transcript in less than 200 words:                                                                                                                                   |
| caller: Hello!                                  

In [18]:
df_predict_results = llama_model_ref.predict(deployment_name="llama_predict",data=df_inputs)

In [9]:
df_predict_results.show(max_width=600)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"inputs"                                                                                                                                                                                   |"outputs"                                                                                                                                                                       

In [19]:
df_summary = df_predict_results.with_columns(
    ["summary"],
    [
       F.replace(F.get(F.get(F.parse_json(F.col('"outputs"')), 0), F.lit("generated_text")), r"\"", '"'),
    ],
)

df_summary.show(max_width=600)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------