In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from snowflake.snowpark import Session
from snowflake.snowpark import functions as F
from snowflake.snowpark import types as T
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
from dotenv import load_dotenv
import cachetools

In [None]:
load_dotenv()

In [None]:
session=Session.builder.configs(SnowflakeLoginOptions("test_conn")).create()
session

In [None]:
session.clear_imports()
session.clear_packages()

@cachetools.cached(cache={})
def load_file(file_name):
    import sys, os, joblib
    import_dir=sys._xoptions.get("snowflake_import_directory")
    if import_dir:
        with open(os.path.join(import_dir,file_name),'rb') as f:
            return joblib.load(f)

In [None]:
artifact_imports=['@int_stage/models/2025-10-13-091807/housing_fores_reg.joblib']
modules_imports=['snowflake-snowpark-python','scikit-learn','pandas','numpy','joblib','cachetools']

@F.udf(name="predict_house_value",is_permanent=True,replace=True,
        stage_location='int_stage/udf/',imports=artifact_imports,
        packages=modules_imports)
def predict_house_value(df:T.PandasDataFrame[
    float,float,float,float,float,float,float,float,str]) -> T.PandasSeries[float]:
    df.columns=['LONGITUDE','LATITUDE','HOUSING_MEDIAN_AGE',
                              'TOTAL_ROOMS','TOTAL_BEDROOMS','POPULATION',
                              'HOUSEHOLDS','MEDIAN_INCOME','OCEAN_PROXIMITY']
    m=load_file('housing_fores_reg.joblib')
    return m.predict(df)
    

In [None]:
df=session.table("HOUSING_TEST")
inputs=df.drop("MEDIAN_HOUSE_VALUE")

df=df.select(*inputs,
            F.call_function("predict_house_value",*inputs).alias('PREDICTION'),  # first param is the housing name, second param is the name of the features
            F.col('MEDIAN_HOUSE_VALUE').alias("ACTUAL_LABEL"))

df.show()