In [1]:
import modin.pandas as pd
import snowflake.snowpark.modin.plugin
import pandas as native_pd
import json
from snowflake.snowpark import Session
# Create Snowflake Session object
from pathlib import Path
import sys
connection_parameters_path = str(Path("__file__").absolute().parent.parent.parent.parent)
sys.path.append(connection_parameters_path)
from tests.parameters import CONNECTION_PARAMETERS

session = Session.builder.configs(CONNECTION_PARAMETERS).create()

## JSON Operations

In [2]:
df = pd.DataFrame({
    'id': [1, 2],
    'json_col': [{'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]
})
df

Unnamed: 0,id,json_col
0,1,"{'age': 30, 'name': 'Alice'}"
1,2,"{'age': 25, 'name': 'Bob'}"


In [3]:
# Extract the value associated with the 'name' key
from snowflake.snowpark.functions import get, lit
df['name'] = df['json_col'].apply(get, args=[lit('name')]) # sf.get function will call SQL get 
df['age'] = df['json_col'].apply(get, args=[lit('age')]) # sf.func is a more general function builder 
df

convert_dtype is ignored in Snowflake backend.


Unnamed: 0,id,json_col,name,age
0,1,"{'age': 30, 'name': 'Alice'}",Alice,30
1,2,"{'age': 25, 'name': 'Bob'}",Bob,25


In [4]:
from snowflake.snowpark.functions import replace
df["rename"] = df.name.apply(replace, args=["Bob", "John"])
df

Unnamed: 0,id,json_col,name,age,rename
0,1,"{'age': 30, 'name': 'Alice'}",Alice,30,Alice
1,2,"{'age': 25, 'name': 'Bob'}",Bob,25,John


In [5]:
from snowflake.snowpark.functions import min
df.age.agg(min)

25

## Cortex 

In [6]:
series = pd.Series([
    "how old are you",
    "what is your name",
    "how many days in a week"
])
series

0            how old are you
1          what is your name
2    how many days in a week
dtype: object

In [7]:
from snowflake.snowpark.functions import cortex_translate
series.apply(cortex_translate, args=["en", "zh_CN"])

0    多大成年齢你?
1      那你名何?
2     一週何日間?
dtype: object

In [8]:
series.apply(cortex_translate, args=["en", "es"])

0           ¿Cuántos años tienes?
1                ¿Cómo te llamas?
2    ¿Cuántos días en una semana?
dtype: object

In [9]:
df = pd.DataFrame({"prompt": [
    "What model am I using right now?",
    "What is Snowpark pandas API?",
    "Can you show me an example pandas dataframe?"
]})
df

Unnamed: 0,prompt
0,What model am I using right now?
1,What is Snowpark pandas API?
2,Can you show me an example pandas dataframe?


In [11]:
from snowflake.snowpark.functions import cortex_complete
df["answer"] = df.prompt.apply(cortex_complete, args=["snowflake-arctic"])

In [12]:
from IPython.display import display, HTML

def pretty_print(df):
    return display( HTML( df.to_html().replace("\\n","<br>") ) )

# Set display options to show full text
pd.set_option('display.max_colwidth', None)
pretty_print(df.to_pandas())

Unnamed: 0,prompt,answer
0,What model am I using right now?,"I don't have access to your current device or software. Please specify the type of device or software you are using, and I'll try my best to identify the model."
1,What is Snowpark pandas API?,"Snowpark Pandas API is an extension of the Snowpark API, which is a library developed by Snowflake to enable data scientists, data engineers, and developers to interact with Snowflake's Data Cloud using familiar programming constructs such as DataFrames and APIs. The Snowpark Pandas API specifically allows users to work with Pandas DataFrames within the Snowflake environment, enabling them to leverage the powerful data manipulation and analysis capabilities of Pandas while working with data stored in Snowflake. This API provides a seamless experience for users who are already familiar with Pandas and want to work with their data stored in Snowflake without having to learn a new library or language. It also allows users to take advantage of the scalability and performance of the Snowflake platform when working with large datasets."
2,Can you show me an example pandas dataframe?,"Sure! Here's an example of a pandas DataFrame: ```python import pandas as pd data = {  'apples': [3, 2, 0, 1], 'oranges': [0, 3, 7, 2] } purchases = pd.DataFrame(data) print(purchases) ``` This will output: ```  apples oranges 0 3 0 1 2 3 2 0 7 3 1 2 ``` This DataFrame has two columns ('apples' and 'oranges') and four rows, each representing a purchase of apples and oranges. The numbers in the table indicate the quantity of each fruit purchased."
