In [None]:
# !pip install snowflake-snowpark-python[pandas]==1.5.0
# !pip install snowflake-connector-python
# !pip install snowflake-ml-python==1.0.10

In [None]:
!pip install snowflake[ml]==0.8.0

In [None]:
# !pip install snowflake-connector-python
!pip uninstall cloudpickle -y
!pip install cloudpickle==2.2.1

In [1]:
from snowflake.snowpark import Session

In [2]:
REGISTRY_DATABASE_NAME = "MODEL_REGISTRY_SNOWSIGHT"
REGISTRY_SCHEMA_NAME = "SNOWINSIGHT"

In [3]:
import configparser
def get_session():
    parser = configparser.ConfigParser()
    # Add the credential file name here
    parser.read('/notebooks/notebooks/config.ini')

    connection_params = dict(user=parser['Credentials']['user'], 
                         password=parser['Credentials']['password'], 
                         account=parser['Credentials']['account'], 
                         warehouse=parser['Credentials']['warehouse'], 
                         database=REGISTRY_DATABASE_NAME,
                         schema=REGISTRY_SCHEMA_NAME, 
                         role=parser['Credentials']['role'])
#     print("connection_params = ",  connection_params)


    session = Session.builder.configs(connection_params).create()
    return session

In [4]:
session = get_session()
# session.close()
session

<snowflake.snowpark.session.Session at 0x7b3329764f10>

In [5]:
# from snowflake.ml.registry import model_registry
from snowflake.ml.registry import Registry as model_registry

In [None]:
!pip list | grep -i sci

In [None]:
session.sql("LIST STAGES")

In [None]:
# model_registry.create_model_registry(session=session, database_name=REGISTRY_DATABASE_NAME, schema_name=REGISTRY_SCHEMA_NAME)

In [6]:
registry = model_registry(session=session, database_name=REGISTRY_DATABASE_NAME, schema_name=REGISTRY_SCHEMA_NAME )

In [None]:
# registry = model_registry.ModelRegistry(session=session, database_name=REGISTRY_DATABASE_NAME, schema_name=REGISTRY_SCHEMA_NAME)

In [None]:
df = registry.show_models()
df
# type(df)

# Get model list from snowpark registry

In [None]:
model_list = registry.models()
model_list

# Get model details from registry

In [None]:
m =registry.get_model("sample_sklearn_model")
m

# Get model versions

In [None]:
df_version = m.show_versions()
df_version

# Set default model version

In [None]:
# default_version = m.default
m.default = "V1"

# Get default model version details from model-registry

In [None]:
mv = m.default
mv

In [None]:
x

In [None]:
#Load default vesion as python object
# clf = mv.load()

# Run prediction on scikit-learn model

In [None]:
import numpy as np
data_list = [[ 25, 185,  88,  78,  65,  81,  77,  75,  81,  63,  37,  26,   9, 10,  79,  82]]
data_array = np.asarray(data_list)
print("data_array =", data_array)

remote_prediction = mv.run(data_array, function_name="predict")
remote_prediction   # assuming test_features is Snowpark DataFrame
remote_prediction

In [None]:
# import cloudpickle

# model_obj = cloudpickle.load(open("/notebooks/notebooks/ml_model", "rb"))
# model_obj


In [None]:
type(X_train)

In [None]:
# x_train = cloudpickle.load(open("/notebooks/notebooks/x_train", "rb"))
# x_train
# type(x_train)

# Deploy model with custom class

In [None]:
from snowflake.ml.model import custom_model

In [None]:
class ExamplePipelineModel(custom_model.CustomModel):
#     def __init__(self, context: ModelContext) -> None:
#         super().__init__(context)
#         v = int(open(context.path('config')).read())
#         self.bias = json.loads(v)['bias']

    @custom_model.inference_api
    def run(self, data_list: list) -> pd.DataFrame:
#         features = self.context.model_ref('feature_preproc').transform(input)
#         model_output = self.context.model_ref('m2').predict(
#             self.context.model_ref('m1').predict(features)
#         )
#         return pd.DataFrame({
#             'output': model_output + self.bias})
        data_array = np.asarray(data_list)
        try:
            prediction = model.predict(data_array)
        except:
            prediction = model.predict(data_array.reshape(1, -1))
        return prediction.tolist()

In [None]:
!pip freeze | grep cloud

# Train & register model

In [None]:
import numpy as np
import pandas as pd

from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# Load the data
data = pd.read_csv("/data/fifa.csv")
data.head(10)

In [None]:
type(y_train)

In [None]:
X = data[['age','height_cm','weight_kg','skill_dribbling','attacking_crossing','attacking_finishing','movement_acceleration','movement_sprint_speed','power_shot_power','mentality_aggression','defending_marking','defending_standing_tackle','goalkeeping_diving','goalkeeping_handling','overall','potential']]
y = data['value_eur']
ylog = np.log(y)

# %matplotlib inline
# import matplotlib.pyplot as plt
# plt.hist(ylog, bins='auto')
# plt.title("ln(value_eur)")
# plt.show()

X_train, X_test, ylog_train, ylog_test, y_train, y_test = train_test_split(X, ylog, y, test_size=0.25, random_state=4)

In [None]:
gbm_default = GradientBoostingRegressor()
gbm_default.fit(X_train, y_train)

In [None]:
gbm_default.head()

In [None]:
import joblib
def dump_model(model, path):
    joblib.dump(model, path)

In [None]:
dump_model(gbm_default, "/notebooks/notebooks/ml_model_4")

# Generating model signature

In [None]:
from snowflake.ml.model import model_signature

In [None]:
sig = model_signature.infer_signature(
    X_train.head(),
    data['value_eur'],
    input_feature_names=['age','height_cm','weight_kg','skill_dribbling',
                         'attacking_crossing','attacking_finishing','movement_acceleration',
                         'movement_sprint_speed','power_shot_power','mentality_aggression',
                         'defending_marking','defending_standing_tackle','goalkeeping_diving',
                         'goalkeeping_handling','overall','potential'],
    output_feature_names=['value_eur'])

In [None]:
mv = registry.log_model(gbm_default,
                   model_name="sample_sklearn_model_4",
                   version_name="v1",
                   conda_dependencies=["scikit-learn==1.3.2", "scipy==1.13.1", "cloudpickle==2.2.1"],
                   comment="My awesome ML model",
                    python_version="3.9.19",
                   metrics={"score": 96},
                   sample_input_data=X_train
                       )

In [None]:
import joblib
def dump_model(model, path):
    joblib.dump(model, path)

In [None]:
dump_model(gbm_default, "/notebooks/notebooks/ml_model_3")

# Create stage in snowflake-model-registry

In [None]:
# query = """
#         create or replace stage sample_model_serving_udf_stg
#         directory = (enable = true)
#         copy_options = (on_error='skip_file')
#         """

# session.sql(query)



In [None]:
# To see the stages
session.sql("show stages")

In [None]:
from snowflake.snowpark.files import SnowflakeFile

In [None]:
import cloudpickle
with SnowflakeFile.open("@STG_ML_MODELS/ml_model_2") as file:
    m= cloudpickle.load(file)
    print(type(m))

# Upload model file on ML_MODELS stage 

In [None]:
#create the stage for storing the ML models
# session.sql("USE MODEL_REGISTRY_SNOWSIGHT;")
session.sql('CREATE OR REPLACE STAGE STG_ML_MODELS').show()

In [None]:
session.file.put(
    X_train.to_csv(), "@STG_ML_MODELS", auto_compress=False, overwrite=True
)

In [None]:
session.file.put(
    "/notebooks/notebooks/ml_model_4", "@STG_ML_MODELS", auto_compress=False, overwrite=True
)

# Read model file from stage

In [None]:
session.clear_imports()
session.clear_packages()

#Register above uploded model as import of UDF
session.add_import("@STG_ML_MODELS/ml_model_2")

#map packege dependancies
session.add_packages("scikit-learn", "pandas", "cloudpickle==2.2.1")

In [None]:
from snowflake.snowpark.types import PandasSeries, PandasDataFrame
import snowflake.snowpark.functions as F
from typing import  Optional



def read_file(filename):
    import cloudpickle
    import sys
    import os
    
    #where all imports located at
    import_dir = sys._xoptions.get("snowflake_import_directory")

    if import_dir:
        with open(os.path.join(import_dir, filename), 'rb') as file:
            m = cloudpickle.load(file)
            return m

#register UDF
@F.udf(name = 'READ_MODEL_FROM_FILE', is_permanent = True, replace = True, stage_location = '@STG_ML_MODELS')
def read_model_from_file(file_name:Optional[str]) -> str:
    import numpy as np
    
    # later we will input train data as JSON object
    # hance, we have to convert JSON object as pandas DF
    data_array = np.asarray(data_list)
    pipeline = read_file(file_name)
    print("type of pickle object =", type(pipeline))
#     dict_mod["model_obj"] = pipeline
    return "pipeline"
#     return pipeline.predict(data_array).to_list()




# @F.udf(name = 'READ_MODEL_FROM_FILE', is_permanent = True, replace = True, stage_location = '@STG_ML_MODELS')
# def read_model_from_file(file_name:Optional[str]) -> str:
#     import numpy as np
    
#     # later we will input train data as JSON object
#     # hance, we have to convert JSON object as pandas DF
#     data_array = np.asarray(data_list)
#     pipeline = read_file(file_name)
#     print("type of pickle object =", type(pipeline))
# #     dict_mod["model_obj"] = pipeline
#     return pipeline
# #     return pipeline.predict(data_array).to_list()

In [None]:
session.sql("DESC FUNCTION READ_MODEL_FROM_FILE("ml_model")").show()

In [None]:
model_udf = read_model_from_file("ml_model_2")
# model_udf = read_file("ml_model_2")

In [None]:
model_udf

In [None]:
registry.

In [None]:
import joblib

In [None]:
# model_obj = joblib.load(read_model_from_file("ml_model_2"))
import os
model_file = os.path.join("/notebooks/notebooks", "ml_model_3")
model_obj = joblib.load(model_file)

In [None]:
X_train.to_csv("/data/X_train.csv")

In [None]:
import numpy as np

In [None]:
data_list = [[ 25, 185,  88,  78,  65,  81,  77,  75,  81,  63,  37,  26,   9, 10,  79,  82]]
data_array = np.asarray(data_list)
print("data_array =", data_array)

remote_prediction = model_obj.run(data_array, function_name="predict")
remote_prediction   # assuming test_features is Snowpark DataFrame
remote_prediction

In [None]:
from snowflake.snowpark import DataFrame

In [None]:
import pandas as pd

In [None]:
from snowflake.snowpark.types import Variant
def read_model_from_stage(session: Session, df_dict: dict) -> Variant:

    # print("file_name =", file_name)
    # session.add_import("@STG_ML_MODELS/"+file_name)
    # map packege dependancies
    # session.add_packages("scikit-learn", "pandas", "cloudpickle==2.2.1")



    # from snowflake.snowpark.types import PandasSeries, PandasDataFrame
    # import snowflake.snowpark.functions as F
    # from typing import Optional
    from snowflake.snowpark.files import SnowflakeFile
    from snowflake.ml.registry.registry import Registry

    import joblib
    df = pd.DataFrame(df_dict)
    print("df ===", df)

    model_file = '@STG_ML_MODELS/ml_model_4'
    # Specify 'mode = rb' to open the file in binary mode.
    with SnowflakeFile.open(model_file, 'rb', require_scoped_url=False) as f:
        model_obj = joblib.load(f)
        # return str(model_obj)
        # return str(model_obj)

        reg = Registry(session=session)

        mv = reg.log_model(model=model_obj,
                           model_name="sample_model",
                           comment="test",
                           version_name="run1",
                           python_version="3.9.19",
                           conda_dependencies=["scikit-learn==1.3.2"],
                           metrics={"model_metrics": {"score": 96}, "project_id": "0001", "type": "Model"},
                           sample_input_data=df
                           )

        print("model got registered successfully")
        return df_dict

In [None]:
session.sproc.register(func=read_model_from_stage,
                           name="read_model_from_stage",
                           packages=["snowflake-snowpark-python", "snowflake-ml-python","scikit-learn", "pandas", "cloudpickle==2.2.1", "joblib"],
                           isPermanant=True,
                           stage_location="@STG_ML_MODELS",
                           replace=True,
                       )

In [None]:
dict_df = X_train.to_dict()

In [None]:
X_train.columns

In [None]:
df = pd.DataFrame(dict_df)

In [None]:
df.columns

In [None]:
model_obj = session.call("read_model_from_stage", dict_df)

In [None]:
!pip install category_encoders

Collecting category_encoders
[?25l  Downloading https://files.pythonhosted.org/packages/7f/e5/79a62e5c9c9ddbfa9ff5222240d408c1eeea4e38741a0dc8343edc7ef1ec/category_encoders-2.6.3-py2.py3-none-any.whl (81kB)
[K     |████████████████████████████████| 81kB 1.2MB/s eta 0:00:01
[?25hCollecting scikit-learn>=0.20.0
[?25l  Downloading https://files.pythonhosted.org/packages/e9/ea/44b8c639afe93c0b55d7f0852b663d18623132a6879516afe0380fa743b6/scikit_learn-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4MB)
[K     |████████████████████████████████| 13.4MB 2.8MB/s eta 0:00:01     |███████████████████████████████▌| 13.2MB 2.8MB/s eta 0:00:01
[?25hCollecting numpy>=1.14.0
  Using cached https://files.pythonhosted.org/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Collecting scipy>=1.0.0
  Using cached https://files.pythonhosted.org/packages/35/f5/d0ad1a96f80962ba65e2ce1de6a1e5

In [1]:
import pandas as pd
import numpy as np
import category_encoders as ce
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import requests
# from refractml import *
# from refractml.constants import MLModelFlavours

ModuleNotFoundError: No module named 'category_encoders'

In [None]:
cars = pd.read_csv("/data/car_new.csv")
cars

In [None]:
Y = cars['class_num']
X = cars.drop (['class_num'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.18, shuffle=True, random_state=25)
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
first_tree = DecisionTreeClassifier()
model=first_tree.fit(X_train, y_train)
model

In [None]:
y_pred = model.predict(X_test)
first_tree.score(X_test, y_test)
y_prob = model.predict_proba(X_test)[:,1]

In [None]:
model.predict(X_train.head(1))

# 