# RAG demo level 3

In [11]:
import subprocess
import os
import json

original_dir = os.getcwd()
try:
    # Jump into the terraform directory
    os.chdir('terraform')

    # Get the database connection string
    SQL_DATABASE = subprocess.run(['terraform', 'output', '-raw', 'SQL_DATABASE'], stdout=subprocess.PIPE).stdout.decode('utf-8')
    SQL_USERNAME = subprocess.run(['terraform', 'output', '-raw', 'SQL_USERNAME'], stdout=subprocess.PIPE).stdout.decode('utf-8')
    SQL_PASSWORD = subprocess.run(['terraform', 'output', '-raw', 'SQL_PASSWORD'], stdout=subprocess.PIPE).stdout.decode('utf-8')
    SQL_SERVER_FQDN = subprocess.run(['terraform', 'output', '-raw', 'SQL_SERVER_FQDN'], stdout=subprocess.PIPE).stdout.decode('utf-8')

    # Get the embedding model endpoint and key
    model_configurations = subprocess.run(['terraform', 'output', '-raw', 'model_configurations'], stdout=subprocess.PIPE).stdout.decode('utf-8')
    model_config = json.loads(model_configurations)
    embedding_model = model_config["models"]["text-embedding-3-large"]
    EMBEDDINGS_ENDPOINT = embedding_model["endpoint"]
    EMBEDDINGS_KEY = embedding_model["key"]
    gpt_4o_mini_model = model_config["models"]["gpt-4o-mini"]
    GPT_4O_MINI_ENDPOINT = gpt_4o_mini_model["endpoint"]
    GPT_4O_MINI_KEY = gpt_4o_mini_model["key"]
    gpt_4o_model = model_config["models"]["gpt-4o"]
    GPT_4O_ENDPOINT = gpt_4o_model["endpoint"]
    GPT_4O_KEY = gpt_4o_model["key"]

    print(f"Using {SQL_SERVER_FQDN} as the database server")
    print(f"Using {EMBEDDINGS_ENDPOINT} as the embedding model endpoint")
    print(f"Using {GPT_4O_MINI_ENDPOINT} as the gpt-4o-mini model endpoint")
    print(f"Using {GPT_4O_ENDPOINT} as the gpt-4o model endpoint")

finally:
    os.chdir(original_dir)

Using sql-graphrag-psbv.database.windows.net as the database server
Using https://graphrag-psbv.openai.azure.com/ as the embedding model endpoint
Using https://graphrag-psbv.openai.azure.com/ as the gpt-4o-mini model endpoint
Using https://graphrag-psbv.openai.azure.com/ as the gpt-4o model endpoint


In [35]:
from openai import AzureOpenAI 
import pandas as pd
import pyodbc

# Azure SQL Entra login option
# conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={SQL_SERVER_FQDN};DATABASE={SQL_DATABASE};UID=tokubica@microsoft.com;Authentication=ActiveDirectoryInteractive;Encrypt=yes;"

# Azure SQL password login option
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={SQL_SERVER_FQDN};DATABASE={SQL_DATABASE};UID={SQL_USERNAME};PWD={SQL_PASSWORD};Encrypt=yes;Charset=Latin1;"

conn = pyodbc.connect(conn_str)

gpt_embedding_client = AzureOpenAI(
    azure_endpoint=EMBEDDINGS_ENDPOINT,
    api_key=EMBEDDINGS_KEY,
    api_version="2025-02-01-preview",
)

gpt_4o_client = AzureOpenAI(  
    azure_endpoint=GPT_4O_ENDPOINT,  
    api_key=GPT_4O_KEY,  
    api_version="2024-05-01-preview",
)

gpt_4o_mini_client = AzureOpenAI(
    azure_endpoint=GPT_4O_MINI_ENDPOINT,  
    api_key=GPT_4O_MINI_KEY,  
    api_version="2024-05-01-preview",
)

### Create nodes and edge tables and indexes

In [37]:
command = """
-- Node Creation: Creating node tables
IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Movie]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Movie]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Character]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Character]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Theme]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Theme]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Setting]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Setting]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Series]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Series]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[Genre]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[Genre]
    (
        [Id] INT PRIMARY KEY,
        [Name] VARCHAR(200) NOT NULL,
        [Content] VARCHAR(MAX) NULL,
        [Embedding] VECTOR(1998) NULL
    ) AS NODE;
END

-- Edge Creation: Creating edge tables
IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[IN_GENRE]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[IN_GENRE] AS EDGE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[FEATURES_CHARACTER]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[FEATURES_CHARACTER] AS EDGE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[INCLUDES_THEME]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[INCLUDES_THEME] AS EDGE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[SET_IN]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[SET_IN] AS EDGE;
END

IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[PART_OF_SERIES]') AND type in (N'U'))
BEGIN
    CREATE TABLE [dbo].[PART_OF_SERIES] AS EDGE;
END

-- Index Creation: Creating nonclustered indexes on [Name] for node tables
IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Movie_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Movie_Name ON [dbo].[Movie]([Name]);
END

IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Character_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Character_Name ON [dbo].[Character]([Name]);
END

IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Theme_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Theme_Name ON [dbo].[Theme]([Name]);
END

IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Setting_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Setting_Name ON [dbo].[Setting]([Name]);
END

IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Series_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Series_Name ON [dbo].[Series]([Name]);
END

IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name = 'IX_Genre_Name')
BEGIN
    CREATE NONCLUSTERED INDEX IX_Genre_Name ON [dbo].[Genre]([Name]);
END

-- Fulltext Catalog and Fulltext Indexes Creation
-- Create a fulltext catalog
IF NOT EXISTS (SELECT * FROM sys.fulltext_catalogs WHERE name = 'FTCatalog')
BEGIN
    CREATE FULLTEXT CATALOG FTCatalog;
END

-- Create fulltext index on the Content column for each node table.
-- Movie: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Movie]')
               AND name = 'UIX_Movie_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Movie_Id ON [dbo].[Movie] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Movie]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Movie] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Movie_Id ON FTCatalog;
END

-- Character: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Character]')
               AND name = 'UIX_Character_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Character_Id ON [dbo].[Character] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Character]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Character] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Character_Id ON FTCatalog;
END

-- Theme: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Theme]')
               AND name = 'UIX_Theme_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Theme_Id ON [dbo].[Theme] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Theme]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Theme] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Theme_Id ON FTCatalog;
END

-- Setting: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Setting]')
               AND name = 'UIX_Setting_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Setting_Id ON [dbo].[Setting] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Setting]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Setting] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Setting_Id ON FTCatalog;
END

-- Series: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Series]')
               AND name = 'UIX_Series_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Series_Id ON [dbo].[Series] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Series]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Series] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Series_Id ON FTCatalog;
END

-- Genre: Create unique index and fulltext index
IF NOT EXISTS (SELECT * FROM sys.indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Genre]')
               AND name = 'UIX_Genre_Id')
BEGIN
    CREATE UNIQUE INDEX UIX_Genre_Id ON [dbo].[Genre] ([Id]);
END

IF NOT EXISTS (SELECT * FROM sys.fulltext_indexes 
               WHERE object_id = OBJECT_ID(N'[dbo].[Genre]'))
BEGIN
    CREATE FULLTEXT INDEX ON [dbo].[Genre] ([Content] LANGUAGE 1033, [Name] LANGUAGE 1033)
    KEY INDEX UIX_Genre_Id ON FTCatalog;
END
"""

conn.autocommit = True
cursor = conn.cursor()
cursor.execute(command)
conn.commit()

### Insert movies with embeddings

Load from file

In [5]:
movies_df = pd.read_json("data/movies_graph.json", orient="records")
print(f"Loaded {len(movies_df)} movies")

Loaded 8551 movies


Embedding function

In [13]:
import time

max_retries = 20
retry_delay = 10

def get_embeddings(texts):
    for attempt in range(1, max_retries + 1):
        try:
            result = gpt_embedding_client.embeddings.create(
                input=texts,
                model="text-embedding-3-large",
                dimensions=1998,
            )
            break
        except Exception as e:
            if hasattr(e, "status_code") and e.status_code == 429:
                time.sleep(retry_delay)
            else:
                print(f"Embedding generation error: {e}")
                raise e
    return [json.dumps(item.embedding) for item in result.data]

Insert movies

In [14]:
batch_size = 100
total_movies = len(movies_df)

for start_idx in range(0, total_movies, batch_size):
    batch = movies_df.iloc[start_idx:start_idx+batch_size]
    
    # Build list of texts and parameters for the batch
    texts = []
    insert_params = []  # holds tuples of (movie_id, movie_name, movie_content)
    for idx, row in batch.iterrows():
        movie_id = str(row['id'])
        movie_name = str(row['title']) if row['title'] is not None else ""
        movie_overview = str(row['overview']) if row['overview'] is not None else ""
        movie_content = f"TITLE: {movie_name} OVERVIEW: {movie_overview}"
        texts.append(movie_content)
        insert_params.append((movie_id, movie_name, movie_content))
    
    # Get embeddings for the entire batch
    embeddings = get_embeddings(texts)
    
    # Insert each movie row with its corresponding embedding
    for (movie_id, movie_name, movie_content), embedding in zip(insert_params, embeddings):
        command = f"""
        INSERT INTO [dbo].[Movie] ([Id], [Name], [Content], [Embedding])
        VALUES (?, ?, ?, '{embedding}')
        """
        cursor.execute(command, movie_id, movie_name, movie_content)
    
    conn.commit() 
    
    movies_inserted = min(start_idx + batch_size, total_movies)
    if movies_inserted % 500 == 0:
        print(f"Inserted {movies_inserted}/{total_movies} movies")

Inserted 500/8551 movies
Inserted 1000/8551 movies
Inserted 1500/8551 movies
Inserted 2000/8551 movies
Inserted 2500/8551 movies
Inserted 3000/8551 movies
Inserted 3500/8551 movies
Inserted 4000/8551 movies
Inserted 4500/8551 movies
Inserted 5000/8551 movies
Inserted 5500/8551 movies
Inserted 6000/8551 movies
Inserted 6500/8551 movies
Inserted 7000/8551 movies
Inserted 7500/8551 movies
Inserted 8000/8551 movies
Inserted 8500/8551 movies


### Insert traits and create graph connections

In [None]:
traits = {
    "genres": ("Genre", "IN_GENRE"),
    "characters": ("Character", "FEATURES_CHARACTER"),
    "themes": ("Theme", "INCLUDES_THEME"),
    "setting": ("Setting", "SET_IN"),
    "series": ("Series", "PART_OF_SERIES"),
}

batch_size = 200
resume_index = 0
total_rows = len(movies_df)

for batch_start in range(resume_index, total_rows, batch_size):
    batch_end   = min(batch_start + batch_size, total_rows)
    batch_df    = movies_df.iloc[batch_start:batch_end]
    print(f"Processing movies {batch_start + 1} to {batch_end} out of {total_rows}")

    for idx, row in batch_df.iterrows():
        movie_id = row['id']
        for trait_attr, (node_table, edge_table) in traits.items():
            trait_values = row.get(trait_attr)
            if not trait_values:
                continue
            # Ensure trait_values is iterable
            if not isinstance(trait_values, list):
                trait_values = [trait_values]

            for trait in trait_values:
                # Insert trait node if not exists
                command_node = f"""
                IF NOT EXISTS (SELECT * FROM [dbo].[{node_table}] WHERE [Name] = ?)
                BEGIN
                    INSERT INTO [dbo].[{node_table}] ([Id], [Name])
                    VALUES (ABS(CHECKSUM(NEWID())), ?)
                END
                """
                cursor.execute(command_node, trait, trait)

                # Insert edge between movie and trait
                command_edge = f"""
                INSERT INTO [dbo].[{edge_table}] ($from_id, $to_id)
                SELECT m.$node_id, t.$node_id
                FROM [dbo].[Movie] m, [dbo].[{node_table}] t
                WHERE m.Id = ? AND t.Name = ?
                """
                cursor.execute(command_edge, movie_id, trait)

    conn.commit()
    print(f"Finished processing movies {batch_start + 1} to {batch_end}")

In [21]:
import jinja2
import json
import tiktoken
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_distinct_trait(conn, trait):
    query = f"""
        SELECT DISTINCT [Name] 
        FROM [dbo].[{trait}]
    """
    cursor = conn.cursor()
    cursor.execute(query)
    results = cursor.fetchall()
    return [row[0] for row in results]

def get_distinct_unprocessed_trait(conn, trait):
    query = f"""
        SELECT DISTINCT [Name]
        FROM [dbo].[{trait}]
        WHERE [Content] IS NULL
    """
    cursor = conn.cursor()
    cursor.execute(query)
    results = cursor.fetchall()
    return [row[0] for row in results]

def get_movies_by_trait(conn, trait, trait_name, edge_name):
    query = f"""
        SELECT TOP 200 m.[Content]
        FROM [dbo].[Movie] m, [dbo].[{edge_name}] e, [dbo].[{trait}] t
        WHERE MATCH(m-(e)->t)
        AND t.[Name] = ?
    """
    cursor = conn.cursor()
    cursor.execute(query, trait_name)
    rows = cursor.fetchall()
    return [row[0] for row in rows if row[0]]

def cap_tokens(input_text, max_tokens=128000, encoding='cl100k_base'):
    tokenizer = tiktoken.get_encoding(encoding)
    tokens = tokenizer.encode(input_text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return tokenizer.decode(tokens)

def get_summary(system_prompt_template, user_prompt_template, trait_name, combined_texts, llm_client, llm_model="gpt-4o"):
    system_prompt = system_prompt_template.render(name=trait_name)
    user_prompt = user_prompt_template.render(combined_texts=combined_texts)
    capped_user_prompt = cap_tokens(user_prompt, max_tokens=120000)
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": capped_user_prompt}
    ]
    
    for attempt in range(100):
        try:
            completion = llm_client.beta.chat.completions.parse(
                model=llm_model,
                messages=messages,
                max_tokens=4000,
                temperature=0.7,
            )
            return completion.choices[0].message.content
        except Exception as e:
            error_str = str(e)
            if ("429" in error_str) or (hasattr(e, "response") and getattr(e.response, "status_code", None) == 429):
                time.sleep(10)
            else:
                raise e
    return None

def store(conn, trait, trait_name, summary, embedding):
    command = f"""
        UPDATE [dbo].[{trait}]
        SET [Content] = ?, [Embedding] = '{embedding}'
        WHERE [Name] = ?
    """
    cursor = conn.cursor()
    cursor.execute(command, summary, trait_name)
    conn.commit()

def process_single_trait(trait, edge_name, trait_name, system_prompt_template, user_prompt_template, llm_client, conn_str, llm_model):
    conn = pyodbc.connect(conn_str)
    movies = get_movies_by_trait(conn=conn, trait=trait, trait_name=trait_name, edge_name=edge_name)
    summary = get_summary(system_prompt_template, user_prompt_template, trait_name, movies, llm_client, llm_model)
    if summary is not None:
        embedding = get_embeddings([summary])[0]
        store(conn=conn, trait=trait, trait_name=trait_name, summary=summary, embedding=embedding)
    return trait_name

def process_trait_parallel(conn_str, trait, edge_name, system_prompt_template_path, user_prompt_template_path, llm_client, llm_model="gpt-4o"):
    with open(system_prompt_template_path, 'r') as f:
        system_prompt_template = jinja2.Template(f.read())
    with open(user_prompt_template_path, 'r') as f:
        user_prompt_template = jinja2.Template(f.read())

    conn = pyodbc.connect(conn_str)

    traits_list = get_distinct_unprocessed_trait(conn, trait)
    total = len(traits_list)
    print(f"Processing {total} unprocessed {trait}")

    with ThreadPoolExecutor(max_workers=400) as executor:
        futures = {
            executor.submit(
                process_single_trait, 
                trait, 
                edge_name, 
                trait_name, 
                system_prompt_template, 
                user_prompt_template, 
                llm_client, 
                conn_str, 
                llm_model
            ): trait_name for trait_name in traits_list
        }
        for idx, future in enumerate(as_completed(futures)):
            trait_name = futures[future]
            try:
                future.result()
            except Exception as e:
                print(f"Error processing {trait_name}: {e}")
            if (idx + 1) % 1000 == 0:
                print(f"Processed {idx + 1} out of {total} of {trait}")


In [22]:
# Genre
process_trait_parallel(conn_str=conn_str, trait="Genre", edge_name="IN_GENRE", system_prompt_template_path="prompts/summarize_genre.jinja2", user_prompt_template_path="prompts/summarize_user.jinja2", llm_client=gpt_4o_mini_client, llm_model="gpt-4o-mini")

Processing 0 unprocessed Genre


In [24]:
# Series
process_trait_parallel(conn_str=conn_str, trait="Series", edge_name="PART_OF_SERIES", system_prompt_template_path="prompts/summarize_series.jinja2", user_prompt_template_path="prompts/summarize_user.jinja2", llm_client=gpt_4o_mini_client, llm_model="gpt-4o-mini")

Processing 321 unprocessed Series
Error processing Emmanuelle: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'medium'}, 'violence': {'filtered': False, 'severity': 'safe'}}}}}
Error processing I Spit on Your Grave: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retr

In [29]:
# Character
process_trait_parallel(conn_str=conn_str, trait="Character", edge_name="FEATURES_CHARACTER", system_prompt_template_path="prompts/summarize_character.jinja2", user_prompt_template_path="prompts/summarize_user.jinja2", llm_client=gpt_4o_mini_client, llm_model="gpt-4o-mini")

Processing 580 unprocessed Character
Error processing Johannes Borgen: ('08001', '[08001] [Microsoft][ODBC Driver 18 for SQL Server]SSL Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Client unable to establish connection. For solutions related to encryption errors, see https://go.microsoft.com/fwlink/?linkid=2226722 (10054)')
Error processing Joe Lamb: ('08001', '[08001] [Microsoft][ODBC Driver 18 for SQL Server]SSL Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Client unable to establish connection. For solutions related to encryption errors, see https://go.microsoft.com/fwlink/?linkid=2226722 

In [30]:
# Theme
process_trait_parallel(conn_str=conn_str, trait="Theme", edge_name="INCLUDES_THEME", system_prompt_template_path="prompts/summarize_theme.jinja2", user_prompt_template_path="prompts/summarize_user.jinja2", llm_client=gpt_4o_mini_client, llm_model="gpt-4o-mini")

Processing 2841 unprocessed Theme
Error processing Crime and Rehabilitation: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'high'}, 'violence': {'filtered': False, 'severity': 'low'}}}}}
Error processing Dehumanization: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering pol

In [31]:
# Setting
process_trait_parallel(conn_str=conn_str, trait="Setting", edge_name="SET_IN", system_prompt_template_path="prompts/summarize_setting.jinja2", user_prompt_template_path="prompts/summarize_user.jinja2", llm_client=gpt_4o_mini_client, llm_model="gpt-4o-mini")

Processing 3456 unprocessed Setting
Error processing A dystopian future: ('08001', '[08001] [Microsoft][ODBC Driver 18 for SQL Server]SSL Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Client unable to establish connection. For solutions related to encryption errors, see https://go.microsoft.com/fwlink/?linkid=2226722 (10054)')
Error processing A New York City restaurant: ('08001', '[08001] [Microsoft][ODBC Driver 18 for SQL Server]SSL Provider: An existing connection was forcibly closed by the remote host.\r\n (10054) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Invalid connection string attribute (0); [08001] [Microsoft][ODBC Driver 18 for SQL Server]Client unable to establish connection. For solutions related to encryption errors, see https://go.microsoft.com/fwl