In [1]:
import os

from snowflake.snowpark import Session
from snowflake.snowpark.functions import udf, sproc, servicesproc
from snowflake.snowpark import functions as F
from snowflake.snowpark.types import *

from datetime import datetime
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [2]:
import requests
import urllib.parse
import os
import signal
import json
import snowflake.connector

connection_parameters = {
    "account": "servicesnow",
    "user": "madkins",
    "password": os.environ['SNOWFLAKE_CI_PASSWORD'],
    "host": "servicesnow.temptest002038.us-west-2.aws.snowflakecomputing.com",
    "database": "madkins",
    "schema": "public"
}
session1 = Session.builder.configs(connection_parameters).create()

session2 = snowflake.connector.connect(
         user=session1._conn._lower_case_parameters['user'],
         password=session1._conn._lower_case_parameters['password'], # insert password here
         account=session1._conn._lower_case_parameters['account'],
         host=session1._conn._lower_case_parameters['host'],
         session_parameters={
             'PYTHON_CONNECTOR_QUERY_RESULT_FORMAT': 'json'
         })

In [3]:
session1._conn._lower_case_parameters

{'account': 'servicesnow',
 'user': 'madkins',
 'password': 'July2892!',
 'host': 'servicesnow.temptest002038.us-west-2.aws.snowflakecomputing.com',
 'database': 'madkins',
 'schema': 'public',
 'application': 'PythonSnowpark',
 'internal_application_name': 'PythonSnowpark',
 'internal_application_version': '1.0.0'}

In [5]:
#token1 = session1._conn._conn._rest._token_request('ISSUE')

token2 = session2._rest._token_request('ISSUE')
token_extract2 = token2['data']['sessionToken']
token2 = f'''\"{token_extract2}\"'''
headers2 = {'Authorization': f'Snowflake Token={token2}'}
print(headers2)

url = 'https://gibzn-tt2038-servicesnow.ingress-03.dev-snowflakecomputing.app/'

response = requests.get(url, headers=headers2)
response.status_code
response.text

{'Authorization': 'Snowflake Token="ver:1-hint:6578993-ETMsDgAAAYegaW06ABRBRVMvQ0JDL1BLQ1M1UGFkZGluZwEAABAAEKHL5NXbkUO+iaccsFHgM0kAAACAED2vppjIDquq0o+4vBh31JRIcYqM95EdA3Pjc8MBziUoog+AVj+qoxrj3XHthA5KGpBWtisqjXSHbTO5uBhlaHx+O8+Wd8rBJn3WogWTztUHTh4cL2Ja3OKZOyNO39qy8qwZ0p8+8NPa4h9ia2+qni8qzBzyhLDRibHrRu/l+hwAFKaL4LK0tRba2FkxQC7V7fGZm95A"'}


'{"Hello":"World"}'

'servicesnow.temptest002038.us-west-2.aws.snowflakecomputing.com'

In [5]:
response.status_code

404

In [6]:
response.reason

'Not Found'

In [8]:
connection_parameters = {
    "account": "VUA92284",
    "user": "snowflake_nvidia",
    "password": os.environ['SNOWFLAKE_TEMP_PASSWORD'],
    "role": "SNOWFLAKE_NVIDIA",  # optional
    "warehouse": "SNOWFLAKE_NVIDIA",  # medium snowpark-optimized
    "database": "SNOWFLAKE_NVIDIA",
    "schema": "PUBLIC",
}

session = Session.builder.configs(connection_parameters).create()

In [3]:
session.sql("USE WAREHOUSE snowflake_nvidia;").collect()

[Row(status='Statement executed successfully.')]

In [4]:
snow_df = session.sql("SELECT * from SENSOR_DATA_MACHINE_FAILURE_100M")
snow_df.limit(5).to_pandas()

Unnamed: 0,Air temperature,Process temperature,Rotational speed,Torque,Tool wear,Machine failure
0,301.760789,309.396186,1707.036783,47.934766,34.141201,0
1,299.87068,308.752652,1416.204916,59.05191,80.001709,0
2,302.198607,309.827915,1095.139524,45.029482,27.988897,0
3,299.270016,308.289058,1684.95856,20.2244,18.487766,0
4,297.489689,312.132123,1297.227154,33.933779,207.472142,0


# Train CPU Model

In [5]:
rows = 10_000_000

In [6]:
@sproc(packages=['snowflake-snowpark-python','scikit-learn', 'xgboost'])
def train_cpu(session: Session) -> str: 
    
    features = [
    'Air temperature',
    'Process temperature',
    'Rotational speed',
    'Torque',
    'Tool wear',
    ]
    
    df = session.sql("SELECT * from SENSOR_DATA_MACHINE_FAILURE_100M").limit(rows).to_pandas()
    
    target = ["Machine failure"]

    X_train = df[features]
    y_train = df[target]
    
    clf = xgb.XGBClassifier()

    start = datetime.now()
    clf.fit(X_train, y_train)
    end = datetime.now()

    return 'XGBoost CPU Train Time: '+str(end-start)+'s'

The version of package scikit-learn in the local environment is 1.2.1, which does not fit the criteria for the requirement scikit-learn. Your UDF might not work when the package version is different between the server and your local environment
The version of package xgboost in the local environment is 1.7.4, which does not fit the criteria for the requirement xgboost. Your UDF might not work when the package version is different between the server and your local environment


In [7]:
train_cpu()

'XGBoost CPU Train Time: 0:01:35.557476s'

# Train GPU Model

In [6]:
@servicesproc(snowservice='NVIDIA', packages=['snowflake-snowpark-python','scikit-learn', 'xgboost'])
def train_gpu(session: Session) -> dict:
    from snowflake.ml.registry import model_registry
    
    
    features = [
    'Air temperature',
    'Process temperature',
    'Rotational speed',
    'Torque',
    'Tool wear',
    ]
    
    df = session.sql("SELECT * from SENSOR_DATA_MACHINE_FAILURE_100M").limit(rows).to_pandas()
    cudf.from_pandas(df)
    
    target = ["Machine failure"]

    X_train = df[features]
    y_train = df[target]

    clf = xgb.XGBClassifier(tree_method='gpu_hist')

    start = datetime.now()
    clf.fit(X_train, y_train)
    end = datetime.now()
    

    registry = model_registry.ModelRegistry(session=session)
    model_id = registry.log_model(model=clf, name="xg_boost", tags={
    "stage": "testing", "classifier_type": "XBGClassifier"})

    return {
        'model_id': model_id,
        'train_time': str(end-start)+'s'
    }

In [7]:
train_gpu()

<!DOCTYPE HTML>
<html>
<head>
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta charset="UTF-8">
    <meta name="viewport"
          content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">

    <!-- set the base path for all urls -->
    <base href="../../../assets/ui/Login/" />
    
    <link rel="icon" href="../../../favicon.ico" />
    
    <!-- The line below must be kept intact for Sencha Cmd to build your application -->
    <script  id="microloader" data-app="a2154f47-8466-46a8-9e13-14fe535c3076" src="microloader.js"></script>
    <style>
        body {
            background: #FAFAFA;
        }
        @keyframes growShrink {
            0% {
                transform: scale(0.6);
            }
            50% {
                transform: scale(0.9);
            }
            100% {
                transform: scale(0.6);
            }
        }
    </style>
</head>
<body data-bind="{&quot;LOCALE&quot;:&quot;en&quot;,&quot;APP_PREFIX_L

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [10]:
from snowflake.ml.registry import model_registry

registry = model_registry.ModelRegistry(session=session)
clf = registry.load_model(id='5155f22ec3a311edb92ccad5e1f77919')

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.



# Inference

## UDF

In [19]:
@udf(packages=['pandas', 'scikit-learn', 'xgboost'])
def predict(df: PandasDataFrame[float, float, float, float, float]) -> PandasSeries[int]:
    features = [
        'Air temperature',
        'Process temperature',
        'Rotational speed',
        'Torque',
        'Tool wear',
    ]
    
    df.columns = features
    return clf.predict(df)

The version of package pandas in the local environment is 1.5.3, which does not fit the criteria for the requirement pandas. Your UDF might not work when the package version is different between the server and your local environment
The version of package scikit-learn in the local environment is 1.2.1, which does not fit the criteria for the requirement scikit-learn. Your UDF might not work when the package version is different between the server and your local environment
The version of package xgboost in the local environment is 1.7.4, which does not fit the criteria for the requirement xgboost. Your UDF might not work when the package version is different between the server and your local environment


In [22]:
#change warehouse
session.sql("USE WAREHOUSE snowflake_regular;").collect()
session.sql("ALTER WAREHOUSE snowflake_regular SET WAREHOUSE_SIZE='MEDIUM';").collect()

[Row(status='Statement executed successfully.')]

In [26]:
%%time

predictions = snow_df.limit(rows).select(
    F.col('"Air temperature"'),
    F.col('"Process temperature"'),
    F.col('"Rotational speed"'),
    F.col('"Torque"'),
    F.col('"Tool wear"'),
    predict(
        F.col('"Air temperature"'),
        F.col('"Process temperature"'),
        F.col('"Rotational speed"'),
        F.col('"Torque"'),
        F.col('"Tool wear"')
    ).alias('PREDICTION'),
).to_pandas()

predictions

CPU times: user 11.9 s, sys: 2.38 s, total: 14.3 s
Wall time: 35.2 s


Unnamed: 0,Air temperature,Process temperature,Rotational speed,Torque,Tool wear,PREDICTION
0,298.865730,311.399959,1438.964513,27.473915,119.649302,0
1,303.338527,307.604207,1650.098003,40.674795,77.180278,0
2,298.615246,309.087900,1406.471948,47.189996,17.463701,0
3,296.463439,309.020670,1632.462393,46.219119,100.041041,0
4,304.153169,308.608864,1320.134581,49.484134,204.868320,0
...,...,...,...,...,...,...
9999995,302.224949,308.456370,1636.146116,19.836256,48.578381,0
9999996,299.948592,311.111997,1385.207778,36.698089,104.420139,0
9999997,300.465219,311.205485,1602.959453,30.192672,170.900726,0
9999998,303.975146,310.323803,1618.984619,47.229033,133.737225,0


AttributeError: DataFrame object has no attribute size