In [1]:
# Import python packages
import sys
import os   
import requests
import pandas as pd
import json
import yaml
from datetime import datetime, timedelta
import datetime as dt

from snowflake.snowpark.functions import date_trunc, current_date
from snowflake.snowpark.functions import col, lit, when
from snowflake.snowpark.types import DateType
import snowflake.snowpark.functions as f


sys.path.append(os.path.abspath('../scripts'))
try:
    from snowflake.snowpark.context import get_active_session
    session = get_active_session()
except Exception:
    from SnowflakeConnector import create_active_session
    session = create_active_session()



  import pkg_resources


In [3]:
table = session.table("snowpark_db.shopify.transactions").show()

-----------------------------------------------------------------------------------------------------------------------------------
|"PROCESSED_AT"  |"ORDER_ID"     |"ORDER_NAME"  |"OrderTransaction"                                  |"LOAD_TIMESTAMP"            |
-----------------------------------------------------------------------------------------------------------------------------------
|2025-04-29      |6585541001321  |815006        |[{"id": "gid://shopify/OrderTransaction/8289039...  |2025-06-16 14:13:56.764112  |
|2025-04-29      |6585544409193  |815007        |[{"id": "gid://shopify/OrderTransaction/8346810...  |2025-06-16 14:13:56.764112  |
|2025-04-29      |6585547096169  |815008        |[{"id": "gid://shopify/OrderTransaction/8346813...  |2025-06-16 14:13:56.764112  |
|2025-04-29      |6585554763881  |815009        |[{"id": "gid://shopify/OrderTransaction/8346823...  |2025-06-16 14:13:56.764112  |
|2025-04-29      |6585560924265  |815010        |[{"id": "gid://shopify/Orde

In [85]:
#Access Token
TOKEN_PATH = "../config/shopify_auth.yaml"
QUERY_PATH = "../graphql/queries/get_transactions.graphql"



with open(TOKEN_PATH, "r") as file:
    config = yaml.safe_load(file)
ACCESS_TOKEN = config.get("ACCESS_TOKEN", {})


with open(QUERY_PATH, "r") as f:
    QUERY_STRING = f.read()


URL = "https://shopmbg.myshopify.com/admin/api/2024-04/graphql.json"


'   query ($cursor: String, $date: String!) {\n      orders(first: 250, query: $date, after: $cursor) {\n        edges {\n          cursor\n          node {\n            id\n            name\n            transactions {\n              id\n              kind\n              status\n              gateway\n              amountSet {\n                presentmentMoney {\n                  amount\n                  currencyCode\n                }\n              }\n              paymentDetails {\n                ... on CardPaymentDetails {\n                  number\n                }\n              }\n            }\n          }\n        }\n        pageInfo {\n          hasNextPage\n          endCursor\n        }\n      }\n    }'

In [82]:
get_transactions = """
    query ($cursor: String, $date: String!) {
      orders(first: 250, query: $date, after: $cursor) {
        edges {
          cursor
          node {
            id
            name
            transactions {
              id
              kind
              status
              gateway
              amountSet {
                presentmentMoney {
                  amount
                  currencyCode
                }
              }
              paymentDetails {
                ... on CardPaymentDetails {
                  number
                }
              }
            }
          }
        }
        pageInfo {
          hasNextPage
          endCursor
        }
      }
    }
    """

In [None]:

def get_orders_for_date(date):
    query = get_transactions

    headers = {
        "Content-Type": "application/json",
        "X-Shopify-Access-Token": ACCESS_TOKEN
    }

    all_orders = []
    cursor = None

    # Prepare the query string for the "processed_at" filter properly
    # Shopify expects a query string like "processed_at:=2025-06-11"
    query_string = f"processed_at:={date}"

    while True:
        variables = {"cursor": cursor, "date": query_string}
        response = requests.post(URL, json={"query": query, "variables": variables}, headers=headers)
        
        if response.status_code != 200:
            print("Request failed:", response.text)
            break
        
        result = response.json()

        if "data" not in result or "orders" not in result["data"]:
            print("Unexpected response structure:", result)
            break

        orders_data = result["data"]["orders"]
        edges = orders_data["edges"]

        for edge in edges:
            all_orders.append(edge["node"])

        page_info = orders_data["pageInfo"]
        if not page_info["hasNextPage"]:
            break

        cursor = page_info["endCursor"]

    print(f"Fetched {len(all_orders)} orders for {date}")
    return all_orders


# Example usage:
all_orders = get_orders_for_date(date)


In [None]:
all_orders[90]

In [None]:

now = datetime.utcnow()
paginated_df = pd.DataFrame([
    {
        "PROCESSED_AT" :date ,
        "ORDER_ID": o['id'].split('/')[-1],
        "ORDER_NAME": o['name'].split('#')[-1],
        "OrderTransaction":json.dumps(o['transactions']),
        "LOAD_TIMESTAMP": now
    }
    for o in all_orders
])



paginated_df.head()

In [None]:


date_list =  ["2025-04-07", "2025-04-08", "2025-04-09", "2025-04-10", "2025-04-11", "2025-04-12", "2025-04-13", "2025-04-14", "2025-04-15", "2025-04-16", "2025-04-17", "2025-04-18", "2025-04-19", "2025-04-20", "2025-04-21", "2025-04-22", "2025-04-23", "2025-04-24", "2025-04-25", "2025-04-26", "2025-04-27", "2025-04-28", "2025-04-29", "2025-04-30"
]


date_list_test = ["2025-04-01", "2025-04-02", "2025-04-03", "2025-04-04", "2025-04-05", "2025-04-06"]


In [None]:

for date in date_list:

    all_orders = get_orders_for_date(date)

    now = datetime.utcnow()
    paginated_df = pd.DataFrame([
        {
            "PROCESSED_AT" :date ,
            "ORDER_ID": o['id'].split('/')[-1],
            "ORDER_NAME": o['name'].split('#')[-1],
            "OrderTransaction":json.dumps(o['transactions']),
            "LOAD_TIMESTAMP": now
        }
        for o in all_orders
    ])
    
    transactions = session.create_dataframe(paginated_df)
    transactions.write.save_as_table("snowpark_db.shopify.transactions", mode="append")
    print(f"Updated transactions for {date}")
    


In [None]:

transactions = session.create_dataframe(pdf)
transactions.write.save_as_table("snowpark.shopify.transactions", mode="append")

In [None]:
all_orders[922]

In [None]:
all_orders[900]

In [None]:

# GraphQL query to get order ID and credit card number
query = """
query {
  orders(first: 250, query: "processed_at:=2025-06-11") {
    edges {
      node {
        id
        name
        transactions {
          id
          kind
          status
          gateway
          paymentDetails {
            ... on CardPaymentDetails {
              number
            }
          }
        }
      }
    }
  }
}
"""
# Headers including your private access token
headers = {
    "Content-Type": "application/json",
    "X-Shopify-Access-Token": ACCESS_TOKEN
}
# Send the request
response = requests.post(url, json={"query": query}, headers=headers)
# Parse response
if response.status_code == 200:
    data = response.json()
    print(data)
   

In [None]:
trasactions_df = pd.DataFrame([
    {
        "ORDER_ID": o['node']['id'].split('/')[-1],
        "ORDER_NAME": o['node']['name'].split('#')[-1],
        "OrderTransaction":json.dumps(o['node']['transactions'])
    }
    for o in data['data']['orders']['edges']
])

In [None]:
trasactions_df.head()

In [None]:


 # Replace with your real token

url = f"https://{SHOP}.myshopify.com/admin/api/2024-04/graphql.json"
headers = {
    "Content-Type": "application/json",
    "X-Shopify-Access-Token": ACCESS_TOKEN
}
payload = {
    "query": """
        query {
          currentBulkOperation {
            id
            status
            objectCount
            fileSize
            url
            partialDataUrl
          }
        }
    """
}

response = requests.post(url, json=payload, headers=headers)
print(response.json())


In [None]:
response.json()

In [None]:
bulk_file_url = response.json()['data']['currentBulkOperation']['url']
fileSize = response.json()['data']['currentBulkOperation']['fileSize']
file_size_mb = int(fileSize) / 1048576
print(file_size_mb)

In [None]:
bulk_file_response = requests.get(bulk_file_url, stream=True)
print(bulk_file_response.raise_for_status())

In [None]:
output_file = "bulk_results.jsonl"
with open(output_file, "wb") as f:
    for chunk in bulk_file_response.iter_content(chunk_size=8192):
        f.write(chunk)

print("Download complete:", output_file)

In [None]:
df = pd.read_json("bulk_results.jsonl", lines=True)
df.head()