# Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

# Load variables
import os
from dotenv import load_dotenv
load_dotenv()

# Snowpark Imports
from snowflake.snowpark.session import Session
from snowflake.snowpark import functions as F
from snowflake.ml.model.model_signature import FeatureSpec, DataType, ModelSignature

# Connect to Snowflake

In [2]:
snowflake_connection_cfg = {
    "ACCOUNT": os.getenv('SF_ACCOUNT'),
    "USER": os.getenv('SF_USER'),
    "ROLE": os.getenv('SF_ROLE'),
    "PASSWORD": os.getenv('SF_PASSWORD'),
    "DATABASE": os.getenv('SF_DATABASE'),
    "SCHEMA": os.getenv('SF_SCHEMA'),
    "WAREHOUSE": os.getenv('SF_WAREHOUSE')
}

# Creating Snowpark Session
session = Session.builder.configs(snowflake_connection_cfg).create()

print('Role:     ', session.get_current_role())
print('Warehouse:', session.get_current_warehouse())
print('Database: ', session.get_current_database())
print('Schema:   ', session.get_current_schema())

Role:      "ACCOUNTADMIN"
Warehouse: "COMPUTE_WH"
Database:  "MACHINE_LEARNING"
Schema:    "PUBLIC"


In [50]:
# Create some test data to work with
coffee_reviews = [
    "Great tasting coffee but the machine keeps breaking.",
    "Love the coffee and the friendly staff, but the seating area is quite cramped.",
    "Deliciously smooth coffee, however, the customer service is often lacking.",
    "Delicious and strong coffee, however, the cafe is always overcrowded.",
    "Love the coffe but the wifi in that shop is terrible",
    "Terrible the coffe but the wifi in that shop is great!",
    "I hate the coffee, but the seating area is really good!"
]

df = session.create_dataframe(coffee_reviews, schema=['REVIEWS'])
df.show(n=15, max_width=1000)

----------------------------------------------------------------------------------
|"REVIEWS"                                                                       |
----------------------------------------------------------------------------------
|Great tasting coffee but the machine keeps breaking.                            |
|Love the coffee and the friendly staff, but the seating area is quite cramped.  |
|Deliciously smooth coffee, however, the customer service is often lacking.      |
|Delicious and strong coffee, however, the cafe is always overcrowded.           |
|Love the coffe but the wifi in that shop is terrible                            |
|Terrible the coffe but the wifi in that shop is great!                          |
|I hate the coffee, but the seating area is really good!                         |
----------------------------------------------------------------------------------



# Register & Run Sentiment Model

In [9]:
# Get the model registry object
from snowflake.ml.registry import Registry
reg = Registry(
    session=session, 
    database_name=session.get_current_database(), 
    schema_name=session.get_current_schema()
    )

In [14]:
# Get the aspect based sentiment model from Huggingface
from transformers import pipeline
pipe = pipeline("text-classification", model="yangheng/deberta-v3-base-absa-v1.1")

# use SOWH because model is big
session.use_warehouse('snowpark_opt_wh')

# Register the model to Snowflake
snow_model = reg.log_model(
    pipe, 
    model_name='deberta_v3_base_absa_v1_1', 
    conda_dependencies=['tokenizers','transformers']
    )

In [52]:
# Get the aspect based sentiment model from Huggingface
from transformers import pipeline
pipe = pipeline("text-classification", model="yangheng/deberta-v3-base-absa-v1.1")

# use SOWH because model is big
session.use_warehouse('snowpark_opt_wh')

# Customize our model signature
model_sig = ModelSignature(
                  inputs=[
                      FeatureSpec(dtype=DataType.STRING, name='TEXT')
                      ],
                      outputs=[
                          FeatureSpec(dtype=DataType.STRING, name='label'),
                          FeatureSpec(dtype=DataType.DOUBLE, name='score')
                      ]
                  )

# Register the model to Snowflake
snow_model = reg.log_model(
    pipe, 
    signatures={'predict':model_sig},
    model_name='deberta_v3_base_absa_v1_1', 
    conda_dependencies=['tokenizers','transformers']
    )

# Define the aspect
aspect = 'coffee'

# Create the query for the Hugging Face Model (Text must be started with [CLS], followed by the aspect between [SEP])
df = df.with_column('TEXT',F.concat(F.lit('[CLS]'),F.col('REVIEWS'),F.lit('[SEP]'), F.lit(aspect), F.lit('[SEP]')))

# Run the model
scored_data = snow_model.run(df).cache_result()
scored_data.drop('TEXT').show(n=10,max_width=1000)

------------------------------------------------------------------------------------------------------------------
|"REVIEWS"                                                                       |"label"   |"score"             |
------------------------------------------------------------------------------------------------------------------
|Great tasting coffee but the machine keeps breaking.                            |Positive  |0.9963699579238892  |
|Love the coffe but the wifi in that shop is terrible                            |Positive  |0.9949190020561218  |
|Terrible the coffe but the wifi in that shop is great!                          |Negative  |0.9925636649131776  |
|Deliciously smooth coffee, however, the customer service is often lacking.      |Positive  |0.9963778853416444  |
|I hate the coffee, but the seating area is really good!                         |Negative  |0.9926406741142272  |
|Delicious and strong coffee, however, the cafe is always overcrowded.          