In [1]:
import sqlite3
import os
from faker import Faker
import random
import asyncio



In [2]:

# Ensure the 'data' folder exists
os.makedirs('data', exist_ok=True)

# Define the database path
db_path = os.path.join('data', 'synthetic_socialcare2.db')

# Connect to the SQLite database (creates the file if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()




### Defined SQLite database schema for social care tables: clients, assessments, services, outcomes.

In [3]:

# Connect to database (creates file if not exists)
conn = sqlite3.connect("data/synthetic_socialcare2.db")
cursor = conn.cursor()

# Drop old tables (if running again for testing)
cursor.execute("DROP TABLE IF EXISTS clients")
cursor.execute("DROP TABLE IF EXISTS assessments")
cursor.execute("DROP TABLE IF EXISTS services")
cursor.execute("DROP TABLE IF EXISTS outcomes")

# Create Clients table
cursor.execute("""
CREATE TABLE clients (
    client_id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    age INTEGER,
    gender TEXT,
    postcode TEXT
)
""")

# Create Assessments table
cursor.execute("""
CREATE TABLE assessments (
    assessment_id INTEGER PRIMARY KEY AUTOINCREMENT,
    client_id INTEGER,
    assessment_date TEXT,
    assessment_type TEXT,
    assessor TEXT,
    FOREIGN KEY (client_id) REFERENCES clients(client_id)
)
""")

# Create Services table
cursor.execute("""
CREATE TABLE services (
    service_id INTEGER PRIMARY KEY AUTOINCREMENT,
    client_id INTEGER,
    service_name TEXT,
    start_date TEXT,
    end_date TEXT,
    provider TEXT,
    FOREIGN KEY (client_id) REFERENCES clients(client_id)
)
""")

# Create Outcomes table
cursor.execute("""
CREATE TABLE outcomes (
    outcome_id INTEGER PRIMARY KEY AUTOINCREMENT,
    client_id INTEGER,
    outcome_date TEXT,
    outcome_type TEXT,
    outcome_value TEXT,
    FOREIGN KEY (client_id) REFERENCES clients(client_id)
)
""")

# Save changes
conn.commit()
print("Tables created successfully.")

conn.close()


Tables created successfully.


### Populated tables with synthetic data using Faker.

In [4]:

# Setup
fake = Faker("en_GB")  # UK-style data
conn = sqlite3.connect("data/synthetic_socialcare2.db")
cursor = conn.cursor()

# Number of synthetic clients
N_CLIENTS = 50

for client_id in range(1, N_CLIENTS + 1):
    # ---------- CLIENT ----------
    name = fake.name()
    age = random.randint(18, 95)
    gender = random.choice(["Male", "Female"])
    postcode = fake.postcode()
    
    cursor.execute("""
    INSERT INTO clients (client_id, name, age, gender, postcode)
    VALUES (?, ?, ?, ?, ?)
    """, (client_id, name, age, gender, postcode))
    
    # ---------- ASSESSMENTS ----------
    for _ in range(random.randint(1, 3)):  # 1–3 assessments per client
        cursor.execute("""
        INSERT INTO assessments (client_id, assessment_date, assessment_type, assessor)
        VALUES (?, ?, ?, ?)
        """, (
            client_id,
            fake.date_between(start_date="-2y", end_date="today"),
            random.choice(["Care Act", "Financial", "Risk"]),
            fake.name()
        ))
    
    # ---------- SERVICES ----------
    for _ in range(random.randint(1, 2)):  # 1–2 services per client
        start_date = fake.date_between(start_date="-2y", end_date="-1m")
        end_date = fake.date_between(start_date=start_date, end_date="today")
        cursor.execute("""
        INSERT INTO services (client_id, service_name, start_date, end_date, provider)
        VALUES (?, ?, ?, ?, ?)
        """, (
            client_id,
            random.choice(["Home Care", "Residential Care", "Day Centre", "Direct Payment"]),
            start_date,
            end_date,
            random.choice(["Local Authority", "Private Agency", "Charity"])
        ))
    
    # ---------- OUTCOMES ----------
    for _ in range(random.randint(1, 2)):  # 1–2 outcomes per client
        cursor.execute("""
        INSERT INTO outcomes (client_id, outcome_date, outcome_type, outcome_value)
        VALUES (?, ?, ?, ?)
        """, (
            client_id,
            fake.date_between(start_date="-1y", end_date="today"),
            random.choice(["Independence", "Wellbeing", "Safety", "Financial"]),
            random.choice(["Improved", "Maintained", "Declined"])
        ))

# Save changes
conn.commit()
print("Synthetic data inserted with consistent client_id.")

conn.close()


Synthetic data inserted with consistent client_id.


  cursor.execute("""
  cursor.execute("""
  cursor.execute("""


In [5]:

conn = sqlite3.connect("data/synthetic_socialcare2.db")
cursor = conn.cursor()

cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

conn.close()


[('sqlite_sequence',), ('clients',), ('assessments',), ('services',), ('outcomes',)]


In [6]:
# Example schema dictionary
schema = {
    "clients": ["client_id", "name", "age", "gender", "postcode"],
    "assessments": ["assessment_id", "client_id", "assessment_date", "outcome"],
    "services": ["service_id", "client_id", "service_name", "start_date", "end_date"],
    "outcomes": ["client_id", "outcome_date", "outcome_type", "outcome_value"]
}


## Build AI SQL Generator

### Setup Environment and API keys

In [7]:
from openai import OpenAI
from dotenv import load_dotenv
from agents import Agent, Runner, trace, function_tool
import os
import asyncio

In [8]:
load_dotenv(override=True)

True

In [9]:
openaikey = os.getenv("OPENAI_API_KEY")

if openaikey:
    print(f"The key is found and starts with: {openaikey[:8]}")
else:
    print(f"Key not found")

The key is found and starts with: sk-proj-


#### SQL execution function to run read-only queries on SQLite DB.

In [10]:
 @function_tool
 def execute_sql(query: str):
    """Run a read-only SQL query on the SQLite DB."""
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute(query)
        rows = cursor.fetchall()
        colnames = [desc[0] for desc in cursor.description]
    return [dict(zip(colnames, row)) for row in rows]

#### Configure Agent instance (QueryExecutor) with gpt-4o-mini model.

In [11]:
instruction1 = f"""
You are a SQL assistant for adult social care data.
Always generate SELECT-only SQL for the SQLite schema provided.
Never use DROP, INSERT, UPDATE, DELETE, ALTER, ATTACH, or PRAGMA.
Return strictly JSON with keys:
- sql: the SQL query string
- explanation: a short (1-2 sentence) explanation of the query logic
Schema reference: {schema}
Make sure the output is a single valid JSON object, nothing else.
"""


In [12]:
Agent1 = Agent(
    name = 'QueryExecutor',
    instructions = instruction1,
    model = "gpt-4o-mini"
)

tool1=Agent1.as_tool(tool_name='instruct', tool_description=instruction1)

In [13]:
tools = [tool1,execute_sql]

In [14]:
instruction2 = "You are to execute the SQL query you generate and return a clean formatted table. Use the tools provided to you"

resultagent = Agent(
    name = "strictinstruct",
    instructions = instruction2,
    tools=tools,
    model = 'gpt-4o-mini'
)

message = "List the adults aged over 20 years with their service outcome"

with trace("resultfinder"):
    result = await Runner.run(resultagent, message)
    print(result.final_output)


Here's the list of adults aged over 20 years along with their service outcomes:

| Name                         | Age | Outcome Type    | Outcome Value |
|------------------------------|-----|------------------|---------------|
| Mr Damian Ball               | 81  | Financial        | Declined      |
| Mr Damian Ball               | 81  | Safety           | Improved      |
| Jake Simpson                 | 31  | Independence     | Maintained    |
| Jessica Anderson-Smith       | 72  | Financial        | Declined      |
| Jessica Anderson-Smith       | 72  | Independence     | Maintained    |
| Ms Rita Cross                | 70  | Financial        | Improved      |
| Lindsey Cunningham            | 27  | Independence     | Maintained    |
| Lindsey Cunningham            | 27  | Financial        | Improved      |
| Anne Bray                    | 49  | Safety           | Improved      |
| Anne Bray                    | 49  | Financial        | Maintained    |
| Jordan Smith                

In [None]:
pip install streamlit


In [17]:
import streamlit as st
import asyncio

In [19]:
st.title("Social Care Query Agenr")

#To keep conversation history
messages = st.session_state.get("messages")
if messages is None:
    st.session_state["messages"] = []


#dipslay conversation
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])

#chat input
# Chat input
if prompt := st.chat_input("Ask me about the social care database..."):
    # Show user msg
    st.session_state["messages"].append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Placeholder for assistant reply
    with st.chat_message("assistant"):
        placeholder = st.empty()
        placeholder.markdown("⏳ Querying database...")

        # Run the agent
        async def run_agent():
            result = await Runner.run(resultagent, prompt)
            return result.final_output

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        output = loop.run_until_complete(run_agent())
        loop.close()

        # Replace with agent output
        placeholder.markdown(output)

    # Save assistant response
    st.session_state["messages"].append({"role": "assistant", "content": output})



#### LLM-generated SQL queries, executions and results

In [40]:
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()
    tables = ["clients", "services", "outcomes"]
    for t in tables:
        cursor.execute(f"SELECT COUNT(*) FROM {t}")
        print(f"{t} rows:", cursor.fetchone()[0])


clients rows: 50
services rows: 76
outcomes rows: 73
