# Hybrid execution for Snowpark pandas

In [None]:
import modin.pandas as pd
import snowflake.snowpark.modin.plugin
import numpy as np
import datetime
import pandas as native_pd
from time import perf_counter
from snowflake.snowpark.session import Session; session = Session.builder.create()
from modin.config import AutoSwitchBackend

## Example 1: Working with a small dataframe created from in-memory data is faster

### Before...

In [None]:
us_holidays = [
    ("New Year's Day", "2025-01-01"),
    ("Martin Luther King Jr. Day", "2025-01-20"),
    ("Presidents' Day", "2025-02-17"),
    ("Memorial Day", "2025-05-26"),
    ("Juneteenth National Independence Day", "2025-06-19"),
    ("Independence Day", "2025-07-04"),
    ("Labor Day", "2025-09-01"),
    ("Columbus Day", "2025-10-13"),
    ("Veterans Day", "2025-11-11"),
    ("Thanksgiving Day", "2025-11-27"),
    ("Christmas Day", "2025-12-25")
]

df_us_holidays = pd.DataFrame(us_holidays, columns=["Holiday", "Date"])
df_us_holidays["Date"] = pd.to_datetime(df_us_holidays["Date"])
df_us_holidays["Day_of_Week"] = df_us_holidays["Date"].dt.day_name()
df_us_holidays["Month"] = df_us_holidays["Date"].dt.month_name()

In [None]:
%%time
for index, row in df_us_holidays.iterrows():
    print(f"{row['Holiday']} falls on {row['Day_of_Week']}, {row['Month']} {row['Date'].day}, {row['Date'].year}.")

### With hybrid execution

In [None]:
AutoSwitchBackend.enable()

In [None]:
us_holidays = [
    ("New Year's Day", "2025-01-01"),
    ("Martin Luther King Jr. Day", "2025-01-20"),
    ("Presidents' Day", "2025-02-17"),
    ("Memorial Day", "2025-05-26"),
    ("Juneteenth National Independence Day", "2025-06-19"),
    ("Independence Day", "2025-07-04"),
    ("Labor Day", "2025-09-01"),
    ("Columbus Day", "2025-10-13"),
    ("Veterans Day", "2025-11-11"),
    ("Thanksgiving Day", "2025-11-27"),
    ("Christmas Day", "2025-12-25")
]

df_us_holidays = pd.DataFrame(us_holidays, columns=["Holiday", "Date"])

# Convert Date column to datetime
df_us_holidays["Date"] = pd.to_datetime(df_us_holidays["Date"])

# Add new columns for transformations
df_us_holidays["Day_of_Week"] = df_us_holidays["Date"].dt.day_name()
df_us_holidays["Month"] = df_us_holidays["Date"].dt.month_name()

In [None]:
print(type(df_us_holidays))
print(df_us_holidays.get_backend() )

In [None]:
%%time
for index, row in df_us_holidays.iterrows():
    print(f"{row['Holiday']} falls on {row['Day_of_Week']}, {row['Month']} {row['Date'].day}, {row['Date'].year}.")

## Example 2: When reading data from Snowflake, we immediately transfer it to pandas if the data is below a certain, configurable number of rows (by default 10 million).

In [None]:
df_transactions = pd.read_snowflake("REVENUE_TRANSACTIONS")

In [None]:
print(f"The dataset size is {len(df_transactions)} and the data is located in {df_transactions.get_backend()}.")

In [None]:
df_transactions["DATE"] = pd.to_datetime(df_transactions["DATE"])

In [None]:
%%time
df_transactions.groupby("DATE").sum()["REVENUE"]

In [None]:
last_week_transactions = pd.read_snowflake(
    "SELECT * FROM revenue_transactions WHERE Date >= DATEADD('days', -7, current_date )"
)

In [None]:
# The smaller dataset has been loaded into memory as a pandas dataframe.
print(f"The dataset size is {len(last_week_transactions)} and the data is located in {last_week_transactions.get_backend()}.")

## Example 3: Combining small and large datasets in the same workflow

Soemtimes you are working with multiple dataframes of different sizes and you need to join them together, what happens in this scenario?
When two dataframes are joined and the two dataframe are coming from different engine, we automatically determine what is the most optimal way to move the data to minimize the cost of data movement.

Continuing with our `df_transactions` and `df_us_holidays` dataset.

In [None]:
print("Quick recap:")
print(f"- df_transactions is {len(df_transactions)} rows and the data is located in {df_transactions.get_backend()}.")
print(f"- df_us_holidays is {len(df_us_holidays)} rows and the data is located in {df_us_holidays.get_backend()}.")

In [None]:
holiday_transactions = pd.merge(df_us_holidays, df_transactions, left_on="Date", right_on="DATE")

In [None]:
holiday_transactions.get_backend()

In [None]:
holiday_transactions