# Deploy almost any Hugging Face Model even with shitty Internet
Almost all of the examples in this repository assume you're working with a local client and require uploading the model from local to Snowflake.  
Depending on the model-size, this can take quite some time.  

This notebook shows how to deploy a Stored Procedure that uses Snowflake's External Access Integration to securely deploy almost any Hugging Face Model directly within Snowflake.  
This means that the model will be downloaded from within Snowflake Stored Procedure and afterwards registered as a model in Snowflake's Model Registry.  

The deployed function itself <u><b>does not require external access</b></u> since it incorporates all the required Hugging Face assets in itws own.  
This make is super secure since no data (even if the model contains malicious code) can ever leave your Snowflake environment.

# Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

# Load variables
import os
from dotenv import load_dotenv
load_dotenv()

# Snowpark Imports
from snowflake.snowpark.session import Session
from snowflake.snowpark import functions as F
from snowflake.snowpark.functions import sproc
from snowflake.ml.model.model_signature import FeatureSpec, DataType, ModelSignature

# Connect to Snowflake

In [29]:
snowflake_connection_cfg = {
    "ACCOUNT": os.getenv('SF_ACCOUNT'),
    "USER": os.getenv('SF_USER'),
    "ROLE": os.getenv('SF_ROLE'),
    "PASSWORD": os.getenv('SF_PASSWORD'),
    "DATABASE": os.getenv('SF_DATABASE'),
    "SCHEMA": os.getenv('SF_SCHEMA'),
    "WAREHOUSE": os.getenv('SF_WAREHOUSE')
}

# Creating Snowpark Session
session = Session.builder.configs(snowflake_connection_cfg).create()

# Use Snowpark-Optimized Warehouse
session.use_warehouse('snowpark_opt_wh')

print('Role:     ', session.get_current_role())
print('Warehouse:', session.get_current_warehouse())
print('Database: ', session.get_current_database())
print('Schema:   ', session.get_current_schema())

Role:      "ACCOUNTADMIN"
Warehouse: "SNOWPARK_OPT_WH"
Database:  "MACHINE_LEARNING"
Schema:    "PUBLIC"


# External Access Integration

In [24]:
# Create the network rule and external access integration (requires appropriate privileges)
# https://docs.snowflake.com/sql-reference/sql/create-network-rule#access-control-requirements
# https://docs.snowflake.com/en/sql-reference/sql/create-external-access-integration#access-control-requirements
session.sql("""CREATE OR REPLACE NETWORK RULE HUGGING_FACE_NETWORK_RULE
                    MODE = EGRESS
                    TYPE = HOST_PORT
                    VALUE_LIST = ('huggingface.co','cdn-lfs-us-1.huggingface.co')""").collect()

session.sql("""CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION HUGGING_FACE_ACCESS_INTEGRATION
                    ALLOWED_NETWORK_RULES = (HUGGING_FACE_NETWORK_RULE)
                    ENABLED = true""").collect()

session.sql('CREATE STAGE IF NOT EXISTS HUGGINGFACE_FUNCTIONS').collect()

[Row(status='HUGGINGFACE_FUNCTIONS already exists, statement succeeded.')]

# Stored Procedure with External Access to create Hugging Face Functions in Snowflake

In [32]:
# Stored Procedure with External Access
@sproc(session=session,
       name='DEPLOY_HUGGINGFACE_MODEL', 
       is_permanent=True, 
       replace=True, 
       stage_location='HUGGINGFACE_FUNCTIONS', 
       packages=['snowflake-snowpark-python','snowflake-ml-python','tokenizers','transformers','sentence-transformers'],
       external_access_integrations=['HUGGING_FACE_ACCESS_INTEGRATION'])
def deploy_huggingface_model(session: Session, huggingface_model: str, huggingface_task: str, snowflake_model_name: str, snowflake_version_name: str) -> str:
    import os
    os.environ['HF_HOME'] = '/tmp'
    if len(huggingface_task) > 0:
        # If task is provided, create a huggingface pipeline
        from transformers import pipeline
        model = pipeline(huggingface_task, model=huggingface_model)
    else:
        # Else we assume it's a sentence-transformer
        from sentence_transformers import SentenceTransformer
        model = SentenceTransformer(huggingface_model)

    # Get the model registry object
    from snowflake.ml.registry import Registry
    reg = Registry(
        session=session, 
        database_name=session.get_current_database(), 
        schema_name=session.get_current_schema()
        )

    # Register the model to Snowflake with provided name
    snow_model = reg.log_model(
        model, 
        model_name=snowflake_model_name, 
        version_name=snowflake_version_name,
        conda_dependencies=['tokenizers','transformers','sentence-transformers']
    )
    # We return the models function signatures
    return str(snow_model.show_functions())


The version of package 'snowflake-snowpark-python' in the local environment is 1.14.0, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'snowflake-ml-python' in the local environment is 1.4.0, which does not fit the criteria for the requirement 'snowflake-ml-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'tokenizers' in the local environment is 0.15.2, which does not fit the criteria for the requirement 'tokenizers'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'sentence-transformers' in the local environment is 2.6.1, which does not fit the criteria for the requirement 'sentence-transformers'. Your UDF might not work when the package version is d

In [39]:
model_signature = deploy_huggingface_model(session, 'Falconsai/text_summarization', 'summarization', 'my_text_summarization','v2')
print(model_signature)

[{'name': '__CALL__', 'target_method': '__call__', 'signature': ModelSignature(
                    inputs=[
                        FeatureSpec(dtype=DataType.STRING, name='documents')
                    ],
                    outputs=[
                        FeatureSpec(dtype=DataType.STRING, name='summary_text')
                    ]
                )}]


# Retrieve Model Reference

In [40]:
# Connect to Model Registry
from snowflake.ml.registry import Registry
reg = Registry(
    session=session, 
    database_name=session.get_current_database(), 
    schema_name=session.get_current_schema()
    )

# Retrieve Model Reference
my_model = reg.get_model('my_text_summarization').default

# Run the Model

In [41]:
# Create some test data to work with
long_texts = [
    """
    The History of Internet Development: The internet, as we know it today, began as a military research project in the late 1960s. 
    Initially designed to share information securely among military personnel, it evolved into a tool for scientists to exchange data. 
    Over decades, what started as a rudimentary network of computers has transformed into a global system that connects billions of devices. 
    This massive growth was largely due to the advent of the World Wide Web in the early 1990s, which made the internet more accessible and user-friendly. 
    Websites proliferated rapidly, and the internet became a central medium for communication, entertainment, and commerce. 
    Today, internet technology continues to evolve with trends like the Internet of Things (IoT) and the increasing demand for faster, more reliable connections.
    """,
    """
    Advances in Mobile Technology: Mobile technology has seen unprecedented growth over the last two decades. 
    The transformation from basic cell phones to sophisticated smartphones is one of the most significant innovations in modern technology. 
    Early mobile phones were primarily used for voice communication and short text messages. However, the introduction of smartphones changed the landscape completely. 
    Modern smartphones offer a wide range of functions including internet access, high-quality photography, and mobile applications that cater to every imaginable need, from navigating one's way in a new city to managing personal finances or even monitoring health and fitness. 
    As mobile technology continues to advance, we see the integration of AI and augmented reality, pushing the boundaries of what smartphones can do.
    """,
    """
    The Role of Technology in Modern Education: Technology has dramatically transformed the educational landscape. 
    From elementary classrooms to prestigious universities, technology is an integral part of how education is delivered and received today. 
    Digital tools such as tablets, smartboards, and educational software have become commonplace, making learning more interactive and accessible. 
    Moreover, the rise of online education platforms has made it possible for people around the world to learn anything from coding to art history from the comfort of their homes. 
    These platforms offer courses from top institutions and industry leaders, making quality education more accessible than ever before. 
    While technology in education presents numerous opportunities, it also brings challenges such as digital divide and concerns about screen time.
    """
]


df = session.create_dataframe(long_texts, schema=['DOCUMENTS'])
df.show(n=15, max_width=1000)

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"DOCUMENTS"                                                                                                                                                                                                                                                                             |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|                                                                                                                                                      

In [43]:
my_model.run(df).drop('DOCUMENTS').show(max_width=1000)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SUMMARY_TEXT"                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
----------------------------------------------------------------------------------------------