In [16]:
import requests
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys
import argparse
from datetime import datetime

# set date range
start_date = "2025-06-16" # Start date in YYYY-MM-DD format 
end_date = "2025-06-30"
# datetime.now().strftime('%Y-%m-%d') # Current date in YYYY-MM-DD format 

def parse_date(date_str):
    """Parse date string in YYYY-MM-DD format to datetime object"""
    try:
        return datetime.strptime(date_str, "%Y-%m-%d")
    except ValueError:
        raise argparse.ArgumentTypeError(f"Invalid date format: {date_str}. Please use YYYY-MM-DD")
		
def is_in_date_range(created_at, start_date, end_date):
    """Check if a timestamp is within the specified date range"""
    # Convert ISO string to datetime object
    created_date = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%S.%fZ")
    
    # Compare with start and end dates
    return start_date <= created_date <= end_date

# Load env variables
environment_variables = {}
with open("env") as f:
    for line in f:
        key, value = line.strip().split("=")
        environment_variables[key] = value
		
BASE_URL = environment_variables["BASE_URL"]
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {environment_variables['API_KEY']}"
}

def fetch_page(url, params):
    resp = requests.get(url, params=params, headers=HEADERS)
    if resp.status_code == 200:
        data = resp.json().get("data", None)
        if(len(data)==0):
            return None
        return data
    elif resp.status_code == 400 :
        # no more pages
        return None
    else:
        # unexpected error: let it propagate
        resp.raise_for_status()
		
def fetch_conversations():
    # Start at page 1 and keep incrementing until empty
    page = 1
    all_convos = []
    while True:
        data = fetch_page(f"{BASE_URL}/v1/conversations", {"page": page, "pageSize": 1000})
        if not data:
            break
        all_convos.extend(data)
        print(f"Downloaded conversations page {page}")
        page += 1
    return all_convos	

def fetch_all_messages(convo):
    convo_id = convo["id"]
    page = 1
    messages = []
    while True:
        data = fetch_page(f"{BASE_URL}/v1/conversations/{convo_id}/messages", 
                          {"page": page, "pageSize": 1000})
        if data is None:
            break
        messages.extend(data)
        page += 1
    return {"conversationMeta": convo, "allMessages": messages}

 # Only set end_date if we're using date filtering
if start_date != None:
    start_date = parse_date(start_date).replace(hour=0, minute=0, second=0, microsecond=0)
	
if end_date != None:
    end_date = parse_date(end_date).replace(hour=23, minute=59, second=59, microsecond=999999)
	
date_range_string = "all" if start_date is None and end_date is None else f"{start_date.strftime('%B %d, %Y')} - {end_date.strftime('%B %d, %Y')}"
print(f"Downloading conversations metadata for the range {date_range_string}")

# 1. Fetch all conversation metadata synchronously
conversations = fetch_conversations()

print("Finished fetching conversations metadata")
conversations = [c for c in conversations if (end_date == None and start_date == None) or is_in_date_range(c["attributes"]["createdAt"], start_date, end_date)]
with open(f"conversations-condensed-2024.json", "w") as f:
    json.dump(conversations, f, indent=4)
print("Finished saving conversations metadata")

Downloading conversations metadata for the range June 16, 2025 - June 30, 2025
Downloaded conversations page 1
Downloaded conversations page 2
Downloaded conversations page 3
Downloaded conversations page 4
Downloaded conversations page 5
Downloaded conversations page 6
Downloaded conversations page 7
Downloaded conversations page 8
Downloaded conversations page 9
Downloaded conversations page 10
Downloaded conversations page 11
Downloaded conversations page 12
Downloaded conversations page 13
Downloaded conversations page 14
Downloaded conversations page 15
Downloaded conversations page 16
Downloaded conversations page 17
Downloaded conversations page 18
Downloaded conversations page 19
Downloaded conversations page 20
Downloaded conversations page 21
Downloaded conversations page 22
Downloaded conversations page 23
Downloaded conversations page 24
Downloaded conversations page 25
Downloaded conversations page 26
Downloaded conversations page 27
Downloaded conversations page 28
Downlo

In [95]:
# # normalizing data
# import pandas as pd

# with open("conversations-condensed-2024.json") as f:
#     data = json.load(f)

# kustomer = pd.json_normalize(data)
# kustomer.columns = [col.replace('.','_') for col in kustomer.columns]
# kustomer.drop(columns = ['attributes_firstMessageIn_meta_Wait time',
# 						 'attributes_firstMessageIn_meta_Wait time Last Queue',
# 						 'attributes_lastMessageIn_meta_Wait time',
# 						 'attributes_lastMessageIn_meta_Wait time Last Queue'],
# 			  inplace=True)

# # preview data
# kustomer.head()

In [97]:
# save to csv for uploading to Domo via CSV workbench
# kustomer.to_csv(f"Kustomer {date_range_string}.csv")

In [None]:
# # 2. Fetch messages in parallel threads
# results = []
# percentDone = 0
# max_workers = 10  # tune based on your API rate limits & CPU
# with ThreadPoolExecutor(max_workers=max_workers) as executor:
#     future_to_convo = {executor.submit(fetch_all_messages, c): c for c in conversations}
#     for future in as_completed(future_to_convo):
#         convo = future_to_convo[future]
#         try:
#             full = future.result()
#             results.append(full)
#             percentDone += 1
#             # print(f"{convo['id']} % done: {((percentDone/len(conversations))*100):.2f}%")
#         except Exception as exc:
#             print(f"Error fetching {convo['id']}: {exc}")

In [None]:
# with open(f"conversations-with-messages-{date_range_string}.json", "w") as f:
#     json.dump(results, f, indent=4)
# print("All done!")

# # normalizing data
# import pandas as pd

# with open(f"conversations-with-messages-{date_range_string}.json") as f:
#     data = json.load(f)

# kustomer = pd.json_normalize(data)
# kustomer.columns = [col.replace('.','_') for col in kustomer.columns]
# kustomer.drop(columns = ['conversationMeta_attributes_firstMessageIn_meta_Wait time', 
# 						 'conversationMeta_attributes_lastMessageIn_meta_Wait time', 
# 						 'conversationMeta_attributes_firstMessageIn_meta_Wait time Last Queue', 
# 						 'conversationMeta_attributes_lastMessageIn_meta_Wait time Last Queue'],
# 			  inplace=True)

# # preview data
# print(kustomer.head())
# print(kustomer.info())

# # save to csv for uploading to Domo via CSV Workbench
# kustomer.to_csv(f"conversations-with-messages-{date_range_string}.csv")

In [17]:
from datetime import timedelta

def stay_in_while_loop(current_date):
    if end_date is not None:
        return current_date < end_date
    else:
        return current_date < datetime.now()

def fetch_customers():
    all_customers = []
    current_date = start_date
    params = {
    'filter[createdAt][gte]': start_date.strftime('%Y-%m-%dT%H:%M:%SZ'),
    'filter[createdAt][lt]': (start_date + timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ'),
        'page': 1,
        'pageSize': 1000
    }

    while stay_in_while_loop(current_date):
        customers_resp = requests.get(f"{BASE_URL}/v1/customers", params, headers=HEADERS)
        if customers_resp.status_code == 200:
            test = customers_resp.json().get("meta",None).get("page",None)

            try:
                while  customers_resp.status_code == 200 and customers_resp.json()["meta"]["page"] <= customers_resp.json()["meta"]["totalPages"] and customers_resp.json()["meta"]["page"] <= 10:
                    customers_resp_data = customers_resp.json().get("data", None)
                    params["page"] += 1
                    all_customers.extend(customers_resp_data)
                    customers_resp = requests.get(f"{BASE_URL}/v1/customers", params, headers=HEADERS)
            except Exception as e:
                print(f"Debug 0 {customers_resp.json()}")
                break
        print(f"finished retrieving customers for: {current_date.strftime('%Y-%m-%dT%H:%M:%SZ')}")
        current_date = current_date + timedelta(days=1)
        params["filter[createdAt][gte]"] = current_date.strftime('%Y-%m-%dT%H:%M:%SZ')
        params["filter[createdAt][lt]"] = (current_date + timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ')
        params["page"] = 1

    
    return all_customers

customers = fetch_customers()
with open(f"customers-{date_range_string}.json", "w") as f:
    json.dump(customers, f, indent=4)

finished retrieving customers for: 2025-06-16T00:00:00Z
finished retrieving customers for: 2025-06-17T00:00:00Z
finished retrieving customers for: 2025-06-18T00:00:00Z
finished retrieving customers for: 2025-06-19T00:00:00Z
finished retrieving customers for: 2025-06-20T00:00:00Z
finished retrieving customers for: 2025-06-21T00:00:00Z
finished retrieving customers for: 2025-06-22T00:00:00Z
finished retrieving customers for: 2025-06-23T00:00:00Z
finished retrieving customers for: 2025-06-24T00:00:00Z
finished retrieving customers for: 2025-06-25T00:00:00Z
finished retrieving customers for: 2025-06-26T00:00:00Z
finished retrieving customers for: 2025-06-27T00:00:00Z
finished retrieving customers for: 2025-06-28T00:00:00Z
finished retrieving customers for: 2025-06-29T00:00:00Z
finished retrieving customers for: 2025-06-30T00:00:00Z
