In [None]:
# All imports
import os
import json
import pandas as pd
from pathlib import Path
from psycopg2 import sql
import sys

# Add project root to Python path
PROJECT_ROOT = Path("../").resolve()  # parent folder of notebooks/
sys.path.append(str(PROJECT_ROOT))

from src.db_utils import get_db_connection

# Paths
DATA_PATH = Path("../data/example_support_tickets_synthetic.jsonl")

print(f"Loading data from {DATA_PATH.resolve()}")


In [None]:
# Load tickets as a df
tickets = []
with open(DATA_PATH, "r", encoding="utf-8") as f:
    for line in f:
        ticket = json.loads(line.strip())
        tickets.append(ticket)

print(f"Loaded {len(tickets)} tickets.")
pd.DataFrame(tickets[:5])  



In [None]:
# # Delete and test

# conn = get_db_connection()
# cur = conn.cursor()

# # Delete all rows
# cur.execute("DELETE FROM tickets;")


# conn.commit()
# cur.close()
# conn.close()

# print("All data deleted from tickets table")


In [None]:
# Insert tickets into PostgreSQL
conn = get_db_connection()
cur = conn.cursor()

insert_query = """
INSERT INTO tickets (
    ticket_id, created_at, channel, source_system,
    customer_name, subject, body, internal_comments,
    status, resolution_time_s
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (ticket_id) DO NOTHING;
"""

for t in tickets:
    # Pass the internal_comments as a Python list 
    internal_comments_list = t.get("internal_comments", [])

    values = (
        t.get("ticket_id"),
        t.get("created_at"),
        t.get("channel"),
        t.get("source_system"),
        t.get("customer_name"),
        t.get("subject"),
        t.get("body"),
        internal_comments_list,  
        t.get("status"),
        t.get("resolution_time_s")
    )

    cur.execute(insert_query, values)

conn.commit()
cur.close()
conn.close()

print(f"Inserted {len(tickets)} tickets into PostgreSQL table 'tickets'.")


In [None]:
# Test ingestion
conn = get_db_connection()
cur = conn.cursor()

cur.execute("SELECT COUNT(*) FROM tickets;")
count = cur.fetchone()[0]
print(f"Total tickets in DB: {count}")

cur.execute("SELECT ticket_id, subject, status FROM tickets LIMIT 5;")
for row in cur.fetchall():
    print(row)

cur.close()
conn.close()
