In [7]:
# Add src directory to Python path
import sys
import os
from datetime import date, timedelta
import pandas as pd
import importlib


sys.path.append(os.path.abspath('../src'))

import SnowflakeConnector
importlib.reload(SnowflakeConnector)



from SnowflakeConnector import create_active_session

from snowflake.snowpark.functions import date_trunc, current_date
from snowflake.snowpark.functions import col, lit, when
from snowflake.snowpark.types import DateType
import snowflake.snowpark.functions as f

# Get Snowflake session
session = create_active_session()


In [8]:
today = date.today()
this_monday = today - timedelta(days=today.weekday())
mondays = [this_monday - timedelta(weeks=i+1) for i in range(9)]

snapshot_week = date(2025, 5, 19)  # or however you're setting it
week_plus_14 = snapshot_week + timedelta(days=14)


mondays[8]


datetime.date(2025, 4, 7)

In [9]:
### Load and filter shop orders data 


bimonthly_churn_features = session.table("analytics.analytics_inference.bimonthly_churn_features").\
    select(
        col('EMAIL').alias('email')
    ).distinct()

shop_orders = session.table("analytics.analytics.shop_orders_with_attribution").\
    where(col('EMAIL').isin(bimonthly_churn_features['EMAIL'])).\
    where(col('ORDER_COMPLETED_DATE')>lit('2024-03-11')).\
    select(
        col('ORDER_COMPLETED_DATE').alias('order_completed_date'),
        col('EMAIL').alias('email'),
        col('ORDER_ID').alias('order_id'),
        col("CHANNEL").alias('channel'),
        col('TOTAL_NET_REVENUE').alias('total_net_revenue'),
        col("CUST_ORDER_SEQ").alias('cust_order_seq'),
        col("CUST_SUB_SEQ").alias('cust_sub_seq'),

    )

# Display sample data
shop_orders.show()


-----------------------------------------------------------------------------------------------------------------------------------------------------
|"ORDER_COMPLETED_DATE"  |"EMAIL"                     |"ORDER_ID"     |"CHANNEL"          |"TOTAL_NET_REVENUE"  |"CUST_ORDER_SEQ"  |"CUST_SUB_SEQ"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------
|2025-04-06              |jeff.joan@comcast.net       |6544401694825  |Paid Social        |58.5                 |1                 |NULL            |
|2025-02-22              |melisa-argoitia@live.com    |6465531969641  |Paid Search        |390.0                |1                 |NULL            |
|2025-03-27              |clmargart@gmail.com         |6525595877481  |Paid Social        |198.0                |1                 |1               |
|2024-09-04              |jcmccarter@gmail.com        |6163922583657  |Organic Search     |92.65    

In [10]:
def build_orders_snapshot (df, snapshot_week) :

    orders_snapshot = (
        df.where(col("ORDER_COMPLETED_DATE")<snapshot_week).\
        group_by(col("EMAIL")).agg(
            f.sum(col("TOTAL_NET_REVENUE")).alias("total_net_revenue"),
            f.count_distinct(col("ORDER_ID")).alias("order_count"),
            f.datediff('day', f.max(col("ORDER_COMPLETED_DATE")), lit(snapshot_week)).alias("days_since_last_order"),
            f.mode(col("CHANNEL")).alias("PREFERRED_CHANNEL"),
      
        )
        .select(
            lit(snapshot_week).alias("snapshot_week"),
            col("EMAIL"),
            f.round(col("total_net_revenue"), 2).alias("total_net_revenue"),
            col("order_count"),
            col("days_since_last_order"),
            col("PREFERRED_CHANNEL"),
            when(col('PREFERRED_CHANNEL') == 'Paid Social', 1).otherwise(0).alias('paid_social'),
            when(col('PREFERRED_CHANNEL') == 'Paid Search', 1).otherwise(0).alias('paid_search'),
            when(col('PREFERRED_CHANNEL') == 'Referral', 1).otherwise(0).alias('referral'),
            when(col('PREFERRED_CHANNEL') == 'Affiliate', 1).otherwise(0).alias('affiliate'),
            when(col('PREFERRED_CHANNEL') == 'Organic Social', 1).otherwise(0).alias('organic_social'),
            when(col('PREFERRED_CHANNEL') == 'Organic Search ', 1).otherwise(0).alias('organic_search'),
            when(col('PREFERRED_CHANNEL') == 'Main Site', 1).otherwise(0).alias('main_site'),
            when(col('PREFERRED_CHANNEL') == 'Content', 1).otherwise(0).alias('content'),

        )

    )
    return orders_snapshot

In [11]:
all_snapshots = None

for week in mondays:
    snapshot = build_orders_snapshot(shop_orders, week)
    all_snapshots = snapshot if all_snapshots is None else all_snapshots.union(snapshot)

    active = snapshot.agg(f.count_distinct(col('EMAIL')).alias('EMAIL')).first()[0]
    print(f"Week Completed : {week}, Active: {active}")

all_snapshots= all_snapshots.sort(['EMAIL', 'SNAPSHOT_WEEK'])

Week Completed : 2025-06-02, Active: 147331
Week Completed : 2025-05-26, Active: 145237
Week Completed : 2025-05-19, Active: 143237
Week Completed : 2025-05-12, Active: 141223
Week Completed : 2025-05-05, Active: 138467
Week Completed : 2025-04-28, Active: 135578
Week Completed : 2025-04-21, Active: 132477
Week Completed : 2025-04-14, Active: 129744
Week Completed : 2025-04-07, Active: 127286


In [12]:
all_snapshots.where(col("order_count")>1).show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SNAPSHOT_WEEK"  |"EMAIL"                                             |"TOTAL_NET_REVENUE"  |"ORDER_COUNT"  |"DAYS_SINCE_LAST_ORDER"  |"PREFERRED_CHANNEL"  |"PAID_SOCIAL"  |"PAID_SEARCH"  |"REFERRAL"  |"AFFILIATE"  |"ORGANIC_SOCIAL"  |"ORGANIC_SEARCH"  |"MAIN_SITE"  |"CONTENT"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2025-05-12       |0.minder-detente@icloud.com                         |198.0                |2              |2                        |Content           

In [13]:
all_snapshots.\
        write.mode('overwrite') \
        .save_as_table('analytics.analytics_inference.shop_orders_churn_features')

In [43]:
snapshot_week = mondays[8]

orders_snapshot = build_orders_snapshot (shop_orders, snapshot_week) 

orders_snapshot.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SNAPSHOT_WEEK"  |"EMAIL"                    |"TOTAL_NET_REVENUE"  |"ORDER_COUNT"  |"DAYS_SINCE_LAST_ORDER"  |"PREFERRED_CHANNEL"  |"PAID_SOCIAL"  |"REFERRAL"  |"AFFILIATE"  |"ORGANIC_SOCIAL"  |"ORGANIC_SEARCH"  |"MAIN_SITE"  |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2025-04-07       |lisabenjamin007@gmail.com  |59.4                 |1              |61                       |Paid Social          |1              |0           |0            |0                 |0                 |0            |
|2025-04-07       |nancycharbonneau@me.com    |270.3                |1              

In [39]:
orders_snapshot.select("PREFERRED_CHANNEL").distinct().show()

-------------------------
|"PREFERRED_CHANNEL"    |
-------------------------
|Main Site              |
|Referral               |
|Organic Social         |
|Paid Social            |
|Fondue                 |
|Organic Search - Bing  |
|Affiliate              |
|Shop Site              |
|Other                  |
|Organic Search         |
-------------------------

