# Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

# Load variables
import os
from dotenv import load_dotenv
load_dotenv()

# Snowpark Imports
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F

# Connect to Snowflake

In [2]:
snowflake_connection_cfg = {
    "ACCOUNT": os.getenv('SF_ACCOUNT'),
    "USER": os.getenv('SF_USER'),
    "ROLE": os.getenv('SF_ROLE'),
    "PASSWORD": os.getenv('SF_PASSWORD'),
    "DATABASE": os.getenv('SF_DATABASE'),
    "SCHEMA": os.getenv('SF_SCHEMA'),
    "WAREHOUSE": os.getenv('SF_WAREHOUSE')
}

# Creating Snowpark Session
session = Session.builder.configs(snowflake_connection_cfg).create()

print('Role:     ', session.get_current_role())
print('Warehouse:', session.get_current_warehouse())
print('Database: ', session.get_current_database())
print('Schema:   ', session.get_current_schema())

Role:      "ACCOUNTADMIN"
Warehouse: "COMPUTE_WH"
Database:  "MACHINE_LEARNING"
Schema:    "PUBLIC"


In [13]:
# Create some test data to work with
topic_texts = [
    "Technology is constantly evolving, leading to innovations like smartphones, electric vehicles, and smart homes. These advancements are making everyday tasks more convenient and transforming how we interact with our environment.",
    "Animals play a crucial role in our ecosystem. From pollinators like bees that help in the growth of fruits and vegetables, to predators that maintain the balance of species, each animal has a unique role that supports ecological balance.",
    "Beauty trends have shifted significantly over the years, emphasizing more natural and sustainable products. Today's beauty industry focuses on ethical sourcing and transparency, catering to a more environmentally conscious consumer base.",
    "Politics influence many aspects of our daily lives, from the quality of the roads we drive on to the safety of our neighborhoods. Understanding political systems and participating in electoral processes is crucial for ensuring that our voices are heard and our needs are met.",
    "Machine learning, a subset of artificial intelligence, involves the development of algorithms that can learn from and make predictions based on data. This technology is revolutionizing industries by providing more personalized experiences and improving decision-making processes."
]

df = session.create_dataframe(topic_texts, schema=['INPUTS'])
df.show(n=15, max_width=1000)

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"INPUTS"                                                                                                                                                                                                                                                                                 |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Technology is constantly evolving, leading to innovations like smartphones, electric vehicles, and smart homes. These advancements are making every

# Register & Run Text2Text Model

In [4]:
# Get the model registry object
from snowflake.ml.registry import Registry
reg = Registry(
    session=session, 
    database_name=session.get_current_database(), 
    schema_name=session.get_current_schema()
    )

In [16]:
# Get the text2text generation model from Huggingface
# Make sure it fits into a Snowflake warehouse and does not require GPUs
# Otherwise the model must deployed in Snowpark Container Services
from transformers import pipeline
pipe = pipeline("text2text-generation", model="fabiochiu/t5-base-tag-generation")

# use Snowpark Optimized Warehouse because model is big
session.use_warehouse('snowpark_opt_wh')

# Register the model to Snowflake (predict is the model's function we want to call)
snow_model_custom = reg.log_model(
    pipe, 
    model_name='t5_base_tag_generation', 
    sample_input_data=df.limit(10),
    conda_dependencies=['tokenizers','transformers','sentencepiece']
    )

# Outputs
tags = snow_model_custom.run(df).cache_result()
tags.show(max_width=100)

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"INPUTS"                                                                                              |"GENERATED_TEXT"                                                          |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Beauty trends have shifted significantly over the years, emphasizing more natural and sustainable...  |Beauty                                                                    |
|Politics influence many aspects of our daily lives, from the quality of the roads we drive on to ...  |Politics, Democracy, Society, Democracy                                   |
|Animals play a crucial role in our ecosystem. From pollinators like bees that help in the growth ..