In [2]:
# All imports
import os
import json
import pandas as pd
from pathlib import Path
from psycopg2 import sql
import sys

# Add project root to Python path
PROJECT_ROOT = Path("../").resolve()  # parent folder of notebooks/
sys.path.append(str(PROJECT_ROOT))

from src.db_utils import get_db_connection

# Paths
DATA_PATH = Path("../data/enfuce_support_tickets_synthetic.jsonl")

print(f"Loading data from {DATA_PATH.resolve()}")


Loading data from /Users/sasha/enfuce-support-ai/data/enfuce_support_tickets_synthetic.jsonl


In [15]:
# Load tickets as a df
tickets = []
with open(DATA_PATH, "r", encoding="utf-8") as f:
    for line in f:
        ticket = json.loads(line.strip())
        tickets.append(ticket)

print(f"Loaded {len(tickets)} tickets.")
pd.DataFrame(tickets[:5])  



Loaded 50000 tickets.


Unnamed: 0,ticket_id,created_at,channel,source_system,customer_name,subject,body,internal_comments,status,resolution_time_s
0,TKT-500000,2025-08-21T09:49:57,email,EmailInbox,Sam Heikkinen,URGENT: Hitting rate limits on /transactions (...,"Hello, Hi, I'm Sam Heikkinen. My card keeps ge...",[[Agent-2] 2025-08-21 10:42 - Replied to custo...,in_progress,
1,TKT-500001,2025-10-05T11:06:35,api,Zendesk,Alex Mäkinen,Question about KYC — documents keep getting re...,"Hi team, Hi, I'm Alex Mäkinen. My card keeps g...",[[Agent-1] 2025-10-05 12:47 - No incident on s...,closed,41249.0
2,TKT-500002,2025-08-31T17:06:37,email,Zendesk,Jamie Salonen,Can’t pay online — keeps declining 😕,"Hi team, Hi, I'm Jamie Salonen. Not sure if th...",[[Agent-2] 2025-08-31 17:54 - Awaiting custome...,waiting_on_customer,
3,TKT-500003,2025-06-24T11:44:58,api,EmailInbox,Henrik Svensson,"PRIO: Kort saknas, spärra nu","Hallå, Hi, I'm Henrik Svensson. Jag har proble...",[[L2] 2025-06-24 12:25 - Svarade kunden; begär...,in_progress,
4,TKT-500004,2025-06-23T11:27:43,api,Zendesk,Sam Ahonen,Tokenization failed during wallet setup,"Hey, Hi, I'm Sa Ahonen. Not sure if this is on...",[[Ops] 2025-06-23 13:26 - Escalated to TechOps...,in_progress,


In [16]:
# # Delete and test

# conn = get_db_connection()
# cur = conn.cursor()

# # Delete all rows
# cur.execute("DELETE FROM tickets;")


# conn.commit()
# cur.close()
# conn.close()

# print("All data deleted from tickets table")


All data deleted from tickets table


In [17]:
# Insert tickets into PostgreSQL
conn = get_db_connection()
cur = conn.cursor()

insert_query = """
INSERT INTO tickets (
    ticket_id, created_at, channel, source_system,
    customer_name, subject, body, internal_comments,
    status, resolution_time_s
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (ticket_id) DO NOTHING;
"""

for t in tickets:
    # Pass the internal_comments as a Python list 
    internal_comments_list = t.get("internal_comments", [])

    values = (
        t.get("ticket_id"),
        t.get("created_at"),
        t.get("channel"),
        t.get("source_system"),
        t.get("customer_name"),
        t.get("subject"),
        t.get("body"),
        internal_comments_list,  
        t.get("status"),
        t.get("resolution_time_s")
    )

    cur.execute(insert_query, values)

conn.commit()
cur.close()
conn.close()

print(f"Inserted {len(tickets)} tickets into PostgreSQL table 'tickets'.")


Inserted 50000 tickets into PostgreSQL table 'tickets'.


In [18]:
# Test ingestion
conn = get_db_connection()
cur = conn.cursor()

cur.execute("SELECT COUNT(*) FROM tickets;")
count = cur.fetchone()[0]
print(f"Total tickets in DB: {count}")

cur.execute("SELECT ticket_id, subject, status FROM tickets LIMIT 5;")
for row in cur.fetchall():
    print(row)

cur.close()
conn.close()


Total tickets in DB: 50000
('TKT-500000', 'URGENT: Hitting rate limits on /transactions (ERROR:) 🙂', 'in_progress')
('TKT-500001', 'Question about KYC — documents keep getting rejected 😕', 'closed')
('TKT-500002', 'Can’t pay online — keeps declining 😕', 'waiting_on_customer')
('TKT-500003', 'PRIO: Kort saknas, spärra nu', 'in_progress')
('TKT-500004', 'Tokenization failed during wallet setup', 'in_progress')
