# $$\ Snowpark\ ALS\ Model\ Registry$$

In [1]:
%run helper_functions.ipynb

In [2]:
connection_parameters = json.load(open('connection.json'))
session = Session.builder.configs(connection_parameters).create()

In [3]:
session.get_current_warehouse()

'"LARGE_JOB"'

In [4]:
data = session.table('EVENTS_DATA_CLEANED')
original_data = data.to_pandas()

In [5]:
original_data.shape

(2756101, 9)

## Data Wrangling

In [6]:
# Basic Data Wrangling
original_data['TRANSACTIONID'] = 0
original_data['TS_DATE'] = original_data['TS_DATE'].astype(str)

mapping = {value: index for index, value in enumerate(original_data['EVENT'].unique())}
original_data['EVENT'] = original_data['EVENT'].replace(mapping)


### Model Registry

In [7]:
import time



model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)

# Convert training DataFrame to sparse matrices
sparse_user_item = sparse.csr_matrix((original_data['EVENT'].astype(float), (original_data['VISITOR_ID'], original_data['ITEMID'])))
alpha_val = 40
data_conf = (sparse_user_item * alpha_val).astype('double')

start_time = time.time()

model.fit(data_conf)

end_time = time.time()

execution_time = end_time - start_time

print("Execution time:", execution_time, "seconds")

  0%|          | 0/20 [00:00<?, ?it/s]

Execution time: 34.69214844703674 seconds


In [8]:
from snowflake.ml.model import custom_model
saving_location = '/tmp/als_m1.pkl'

# Serialize the model to a file
from joblib import dump, load
dump(model, saving_location) 



['/tmp/als_m1.pkl']

In [9]:
registry = Registry(session, database_name="MILES", schema_name="NOTEBOOK_DEMO")
MODEL_NAME = "als_m1"

mc = custom_model.ModelContext(
      
    artifacts={
        MODEL_NAME: saving_location     #  use artifacts section of model context. Models should be for models supported by model registry OOTB  
    }
)


In [12]:
mymodelpipeline = ALSModel(mc)
version_name = 'v1'

In [34]:
mv = registry.log_model(mymodelpipeline,
                        model_name=MODEL_NAME,
                        version_name=version_name,                        
                        conda_dependencies=["implicit",'joblib==1.4.0'],
                        comment='ALS model example',                        
                        sample_input_data = data,
                        options={"embed_local_ml_library": True}
                    )

In [37]:
start_time = time.time()

recommend_df = mv.run(X = original_data, function_name = 'predict')

end_time = time.time()

execution_time = end_time - start_time

print("Execution time:", execution_time, "seconds")

Execution time: 138.85317850112915 seconds


In [41]:
138.85317850112915 / 60

2.3142196416854857

In [13]:
mv_load = registry.get_model(model_name = MODEL_NAME)
mv_load_version = mv_load.version(version_name = version_name)

In [14]:


start_time = time.time()

recommend_df = mv_load_version.run(X = original_data, function_name = 'predict')

end_time = time.time()

execution_time = end_time - start_time

print("Execution time:", execution_time, "seconds")

Execution time: 147.7322061061859 seconds


In [15]:
147.7322061061859 / 60

2.4622034351030986

In [59]:
session.get_current_warehouse()

'"LARGE_JOB"'

In [38]:
recommend_df.head()

Unnamed: 0,rec1,rec2,rec3,rec4,rec5
0,461686,46232,409804,234255,287449
1,318333,316472,236158,42002,57245
2,46156,113535,159822,130113,340375
3,461686,46232,409804,234255,287449
4,299222,254418,217605,233695,315545


In [39]:
original_data_snowpark_dataframe= session.create_dataframe(original_data)            
original_data_snowpark_dataframe.write.save_as_table('EVENTS_DATA_CLEANED_SNOWPARK')