In [81]:
import sys
import os  
from datetime import datetime, timedelta, date
import datetime as dt

import requests
import pandas as pd
import json
import yaml
import importlib


from snowflake.snowpark.functions import date_trunc, current_date, to_date, month
from snowflake.snowpark.functions import col, lit, when
from snowflake.snowpark.types import DateType
import snowflake.snowpark.functions as f


sys.path.append(os.path.abspath('scripts'))

try:
    from snowflake.snowpark.context import get_active_session
    session = get_active_session()
except Exception:
    import SnowflakeConnector  # brings module into scope
    importlib.reload(SnowflakeConnector)
    from SnowflakeConnector import create_active_session
    session = create_active_session()


import utils
importlib.reload(utils)
from utils import get_orders_for_date, backfill_date_generator, get_min_date_from_table, get_max_date_from_table





In [67]:
#Access Token
TOKEN_PATH = "config/shopify_auth.yaml"
QUERY_PATH = "graphql/queries/get_transactions.graphql"
SELLINGPLAN_TABLE = "snowpark_db.shopify.orders_with_selling_plan"
DATE_COLUMN = "PROCESSED_AT"


with open(TOKEN_PATH, "r") as file:
    config = yaml.safe_load(file)
ACCESS_TOKEN = config.get("ACCESS_TOKEN", {})


with open(QUERY_PATH, "r") as file:
    QUERY_STRING = file.read()


SHOP_ENDPOINT = "https://shopmbg.myshopify.com/admin/api/2024-04/graphql.json"

In [75]:
##See  SELLINGPLAN_TABLE daily summary

SELLINGPLAN = session.table(SELLINGPLAN_TABLE)\
    .select(to_date(col(DATE_COLUMN)).alias("PROCESSED_DATE"), 
            col("ORDER_ID"), 
            col("ORDER_NAME"), 
            col("LINEITEM_ID"), 
            col("TAGS"), 
            col("LOAD_TIMESTAMP")).group_by(col("PROCESSED_DATE"))\
    .agg(f.count(col("ORDER_ID")).alias("ORDER_COUNT")).sort(col("PROCESSED_DATE").desc())

SELLINGPLAN.show()

------------------------------------
|"PROCESSED_DATE"  |"ORDER_COUNT"  |
------------------------------------
|2025-06-16        |931            |
|2025-06-15        |917            |
|2025-06-14        |891            |
|2025-06-13        |912            |
|2025-06-12        |904            |
|2025-06-11        |992            |
|2025-06-10        |863            |
|2025-06-09        |976            |
|2025-06-08        |975            |
|2025-06-07        |991            |
------------------------------------



In [79]:
##See  SELLINGPLAN_TABLE monthly summary
SELLINGPLAN = session.table(SELLINGPLAN_TABLE)\
    .select(month(to_date(col(DATE_COLUMN))).alias("PROCESSED_MONTH"), 
            col("ORDER_ID"), 
            col("ORDER_NAME"), 
            col("LINEITEM_ID"), 
            col("TAGS"), 
            col("LOAD_TIMESTAMP")).group_by(col("PROCESSED_MONTH"))\
    .agg(f.count(col("ORDER_ID")).alias("ORDER_COUNT")).sort(col("PROCESSED_MONTH").desc())

SELLINGPLAN.show()

-------------------------------------
|"PROCESSED_MONTH"  |"ORDER_COUNT"  |
-------------------------------------
|6                  |15279          |
|5                  |31957          |
|4                  |30776          |
|3                  |29500          |
|2                  |27982          |
|1                  |30008          |
-------------------------------------



In [None]:

orders = get_orders_for_date("2025-06-11", ACCESS_TOKEN, SHOP_ENDPOINT, QUERY_STRING)




In [51]:
#Function usage example

get_date = backfill_date_generator(start_date=date(2025, 6, 20), stop_date=date(2025, 6, 15))
for _ in range(10):
    print(next(get_date))

2025-06-19
2025-06-18
2025-06-17
2025-06-16
2025-06-15
Reached stop date: 2025-06-15. Generator exhausted.


StopIteration: 

In [None]:
#maxdate in SELLINGPLAN_TABLE

max_date_in_table = get_max_date_from_table(SELLINGPLAN_TABLE, DATE_COLUMN, session)
max_date_in_table.strftime("%Y-%m-%d")

In [None]:

date = '2025-06-16'
all_orders = get_orders_for_date(date)
now = datetime.utcnow()
paginated_df = pd.DataFrame([
        {
            "PROCESSED_AT" :o['node']['processedAt'] ,
            "ORDER_ID": o['node']['id'].split('/')[-1],
            "ORDER_NAME": o['node']['name'].split('#')[-1],
            "LINEITEM_ID":json.dumps(o['node']['lineItems']),
            "TAGS":json.dumps(o['node']['tags']),
            "LOAD_TIMESTAMP": now
        }
        for o in all_orders
    ])

end = datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)
end = end.strftime('%Y-%m-%dT00:00:00Z')
paginated_df = paginated_df[paginated_df["PROCESSED_AT"]<end]


print(f"end = {end}")




paginated_df.tail(10)

#1769

In [None]:
#final extraction 
#date_list = date_generator(stop_date=date(2025, 1, 1))


date_list = ["2025-06-17", "2025-06-18"]

for next_date in date_list:
    all_orders = get_orders_for_date(next_date)
    now = datetime.utcnow()
    paginated_df = pd.DataFrame([
        {
            "PROCESSED_AT" :o['node']['processedAt'] ,
            "ORDER_ID": o['node']['id'].split('/')[-1],
            "ORDER_NAME": o['node']['name'].split('#')[-1],
            "LINEITEM_ID":json.dumps(o['node']['lineItems']),
            "TAGS":json.dumps(o['node']['tags']),
            "LOAD_TIMESTAMP": now
        }
        for o in all_orders
    ])

    #
    end = datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)
    end = end.strftime('%Y-%m-%dT00:00:00Z')
    paginated_df = paginated_df[paginated_df["PROCESSED_AT"]<end]
    

    records = paginated_df.to_dict(orient="records")
    transactions = session.create_dataframe(records)
    transactions.write.save_as_table("snowpark_db.shopify.orders_with_selling_plan", mode="append")
 

    print(f"Updated transactions for {next_date}")