In [None]:
sonar-pro

In [None]:
import json
import re
import requests
import yaml
import time

# Load configuration (ensure config.yaml has your Perplexity key under "Perplexity")
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

PERPLEXITY_API_KEY = config["Perplexity"]

# Helper function to extract JSON from a mixed response
def robust_extract_json(response_text):

    # Attempt extraction from triple backticks:
    code_block_match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", response_text)
    if code_block_match:
        candidate = code_block_match.group(1)
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            pass

    # Fallback: search from the first '{' to the last '}'
    start = response_text.find("{")
    end = response_text.rfind("}")
    if start != -1 and end != -1 and end > start:
        candidate = response_text[start:end+1]
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            return None

    return None

# Function to send a simple request to Perplexity's API
def update_row(row_dict):
    # Use a simple prompt for the current request
    prompt = "how is the weather in solothurn today?"
    
    payload = {
        "model": "sonar-reasoning-pro",
        "messages": [
            {
                "role": "system",
                "content": "you are very ironic and sarcastic, but you are also very helpful."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        "max_tokens": 1000
    }

    headers = {
        "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
        "Content-Type": "application/json"
    }

    try:
        response = requests.post(
            "https://api.perplexity.ai/chat/completions",
            json=payload,
            headers=headers
        )
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Request error: {e}")
        return row_dict

    try:
        response_data = response.json()
    except json.JSONDecodeError as e:
        print(f"JSON decode error in response: {e}")
        return row_dict

    if "choices" not in response_data or not response_data["choices"]:
        print("No choices found in response.")
        return row_dict

    raw_reply = response_data["choices"][0]["message"]["content"]
    print("Raw reply:", raw_reply)  # For debugging

    updated_data = robust_extract_json(raw_reply)
    if updated_data is None:
        print("Could not extract valid JSON; returning original data.")
        return row_dict

    return updated_data

# Execute a simple request (no Excel file used)
result = update_row({})
print("Result:", result)


KeyboardInterrupt: 

In [52]:
import json
import re
import requests
import pandas as pd
import time
import yaml
import os

with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

PERPLEXITY_API_KEY = config["Perplexity"]
def robust_extract_json(response_text):
    code_block_match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", response_text)
    if code_block_match:
        candidate = code_block_match.group(1)
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            pass

    start = response_text.find("{")
    end = response_text.rfind("}")
    if start != -1 and end != -1 and end > start:
        candidate = response_text[start:end+1]
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            return None

    return None

def update_row(row_dict):
    prompt = ( "You are an expert in verifying pharmaceutical data, especially for the German market. "
        "Your task is to verify and update each field of the given row using the most current online information from reliable sources. "
        "Please check every column (e.g., Therapiegebiet, Indikation, Verabreichung, Firma, Wirkstoff, Markennamen, "
        "EMEA / FDA Registered, Registrierung CH erwartet im, etc.) using authoritative websites such as compendium.ch, Swissmedic, "
        "and other trusted sources. \n\n"
        "Hinweis: Die Excel-Daten sind in deutscher Sprache. Bitte nutzen Sie auch deutschsprachige Quellen, wo angebracht. \n\n"
        "IMPORTANT: If you use any online source to verify or update a field, include the URL (source link) in the 'Website' column and make sure these URLs actually exist and not outdated. "
        "If multiple sources are used, separate them with a semicolon and two blank spaces for its readablility. If no source was used or no update was made, leave the field unchanged. \n\n"
        "Only update a field if you have confirmed new, reliable information. Otherwise, return the original value. "
        "Return the updated row as a JSON object with the same keys and no additional commentary or text. \n\n"
        "Row data:\n" + json.dumps(row_dict, indent=2, default=str))
    payload = {
        "model": "sonar-pro", # cost for 10 rows: 0.5 USD
        "web_search_options": {"search_context_size": "high"},
        "temperature": 0.0,
        
        "messages": [ { "role": "system","content": (
                    "You are a highly knowledgeable and diligent expert in pharmaceutical data verification. "
                    "Your role is to verify each field of the given German data using up-to-date online sources. "
                    "Double-check every column, and only update fields when you are certain about the new data via your extensive research. "
                    "If you cannot verify a field, return the original value. Do not include any commentary in your reply." )},
            {"role": "user", "content": prompt }],
        "max_tokens": 8000}

    headers = {"Authorization": f"Bearer {PERPLEXITY_API_KEY}","Content-Type": "application/json"}

    try:
        response = requests.post(
            "https://api.perplexity.ai/chat/completions",
            json=payload,
            headers=headers
        )
        response.raise_for_status()  # Raises an HTTPError if the status is 4xx, 5xx
    except requests.RequestException as e:
        print(f"Request error: {e}")
        return row_dict

    try:
        response_data = response.json()
    except json.JSONDecodeError as e:
        print(f"JSON decode error in response: {e}")
        return row_dict

    if "choices" not in response_data or not response_data["choices"]:
        print("No choices found in response.")
        return row_dict

    raw_reply = response_data["choices"][0]["message"]["content"]
    #print("Raw reply:", raw_reply)

    updated_data = robust_extract_json(raw_reply)
    if updated_data is None:
        print("Could not extract valid JSON; returning original row.")
        return row_dict

    return updated_data

input_filename = "Pipeline.xlsx"
output_filename = "updated_Pipeline_tabelle_LE2_sonar_pro_highsearch.xlsx"
sheet_name = "Tabelle für LE2 2022"

df = pd.read_excel(input_filename, sheet_name=sheet_name)
updated_rows = []

for index, row in df.head(10).iterrows():
    row_dict = row.to_dict()
    print(f"Processing row {index+1}/{len(df.head(1))}...")
    updated_data = update_row(row_dict)
    updated_rows.append(updated_data)
    time.sleep(1)

updated_df = pd.DataFrame(updated_rows)
updated_df.to_excel(output_filename, index=False)
print(f"Updated data saved to {output_filename}")


Processing row 1/1...
Processing row 2/1...
Processing row 3/1...
Processing row 4/1...
Processing row 5/1...
Processing row 6/1...
Processing row 7/1...
Processing row 8/1...
Processing row 9/1...
Processing row 10/1...
Updated data saved to updated_Pipeline_tabelle_LE2_sonar_pro_highsearch.xlsx


In [39]:
import json
import re
import pandas as pd
import openai
import time
import os
from openai import OpenAI
import yaml

with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

client = OpenAI(api_key=config["OPENAI_KEY_TEST"],)

def extract_json(reply):
    match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", reply)
    if match:
        return match.group(1)
    return reply

def update_row(row_dict):
    prompt = (
        "You are an expert in verifying and analyzing pharmaceutical data, specializing in identifying active pharmaceutical ingredients (Wirkstoffe) in Phase 3 clinical trials and updating pipeline products. "
        "Your task is to perform a comprehensive market analysis by processing the provided Excel row and verifying its information using only trusted, reliable sources. "
        "These sources include, but are not limited to, ClinicalTrials.gov, EMA, Swissmedic, FirstWord Pharma, Evaluate Pharma, Fierce Pharma, DAZ, and official manufacturer websites. "
        "Focus on checking for any updates in clinical trial phases, approvals, or other key changes. "
        "Add additional information if available, but do not remove any existing data. "
        " Please ensure that the format of the data remains consistent, especially for dates (dd/mm/yyyy). "
        "Return ONLY the updated row as a JSON object with the same keys, with no additional text or explanation. "
        "For every online resource/link used, include the URL in the 'Website' column (if multiple sources are used, separate them with a '&') - moreover please in brackets give the date of the webpage like for example: wwww.pharmazie/news/acti.com (2025). "
        "If no update is needed for a field, retain the original value.\n\n"
        "Row data:\n" + json.dumps(row_dict, indent=2, default=str))
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini-search-preview",
            messages=[
                {"role": "system", "content": "You are an expert in pharmaceutical market analysis specializing in active pharmaceutical ingredients in Phase 3 clinical trials. You verify and update pipeline product data by cross-checking reliable and trusted online sources such as ClinicalTrials.gov, EMA, Swissmedic, FirstWord Pharma, Evaluate Pharma, Fierce Pharma, DAZ, and manufacturer websites. Your analysis should be thorough, focused on current approvals and phase updates, and must rely solely on verified sources."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=5000
        )
        reply = response.choices[0].message.content.strip()
        #print("Raw reply:", reply)

        json_content = extract_json(reply)
        try:
            updated_row = json.loads(json_content)
        except json.JSONDecodeError as e:
            print("JSON decode error:", e)
            return row_dict 
        return updated_row

    except Exception as e:
        print("Error updating row:", e)
        return row_dict  

input_filename = "Pipeline.xlsx"
output_filename = "updated_data.xlsx"
sheet_name = "Tabelle für LE2 2022"  
df = pd.read_excel(input_filename, sheet_name=sheet_name)
updated_rows = []

for index, row in df.head(3).iterrows():
    row_dict = row.to_dict()
    print(f"Processing row {index+1}/{len(df)}...")
    updated_data = update_row(row_dict)
    updated_rows.append(updated_data)
    time.sleep(1)  # Adjust sleep time if necessary

updated_df = pd.DataFrame(updated_rows)
updated_df.to_excel(output_filename, index=False)
print(f"Updated data saved to {output_filename}")


Processing row 1/264...
Processing row 2/264...
Processing row 3/264...
Updated data saved to updated_data.xlsx


In [7]:
from openai import OpenAI
import pandas as pd
import json
from dotenv import load_dotenv
import os
import yaml

load_dotenv()

with open("config.yaml", "r") as file:
     config = yaml.safe_load(file)


client = OpenAI(api_key=config["OPENAI_KEY_TEST"],)



response = client.responses.create(
  model="gtp-4o",
  input = [
    {
        "role": "system",
        "content":  [
        {"type": "input_text",
         "text":
            "Du bist ein Assistent, der aus einem gegebenen Prompt eine Excel-Tabelle generiert. "
            "Erstelle eine vollständige Liste aller Moleküle von allen Indikationen, für die im Jahr 2025 "
            "eine FDA-Entscheidung (inklusive FDA Priority Review) erwartet wird. Die Liste soll nach Indikation "
            "und erwarteten Zulassungsdatum sortiert sein und die folgenden Spalten enthalten: Brandname, Wirkstoff, "
            "Produkteigenschaften, Applikationsformen (inkl. Darreichungsformen wie Pen, Fertigspritze, Filmtablette, Kapsel usw.), "
            "Lagerungsbedingungen (insbesondere ob Kühlung erforderlich ist), spezielle Patientengruppen, Informationen für "
            "Ärzte/Apotheken/Patienten, Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen, Schulungshinweise, "
            "und zugelassene Konkurrenzprodukte. Bitte liefere als Antwort ausschließlich einer geordneten Tabelle"}]
    },
    {
        "role": "user",
        "content":[
        {
          "type": "input_text",
          "text": 
            "Erstelle vollständige Liste aller Moleküle von allen Indikationen für die im Jahr 2025 eine Entscheidung der US-amerikanischen "
            "Food and Drug Administration (FDA) erwartet wird inklusive FDA Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert. "
            "Die Liste umfasst im Weiteren: Brandname, Wirkstoff, Produkteigenschaften, Applikationsformen (einschließlich Darreichungsformen wie Pen, Fertigspritze, "
            "Filmtablette, Kapsel usw.), Lagerungsbedingungen (insbesondere ob eine Kühlung erforderlich ist), spezielle Patientengruppen, Informationen für Ärzte, "
            "Apotheken und Patienten, inklusive Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweisen, zugelassene Konkurrenzprodukte."}]
    }
],
text={
"format": {
    "type": "text"
}
},
reasoning={"effort": "medium"},
  tools=[{
      "type": "computer-use-preview-2025-03-11",
      "search_context_size": "medium"}
  ],
  temperature=0.97,
  max_output_tokens=8432,
  top_p=1,
  store=False
)

BadRequestError: Error code: 400 - {'error': {'message': "Invalid value: 'com...-11'. Supported values are: 'code_interpreter', 'function', 'file_search', 'web_search', 'web_search_preview', 'web_search_preview_2025_03_11', 'computer-preview', and 'computer_use_preview'.", 'type': 'invalid_request_error', 'param': 'tools[0].type', 'code': 'invalid_value'}}

In [2]:
import json
print(response.output_text)
print(json.dumps(response.output, default=lambda o: o.__dict__, indent=2))

Hier ist eine vollständige Liste der Moleküle für alle Indikationen, für die im Jahr 2025 eine Entscheidung der US-amerikanischen Food and Drug Administration (FDA) erwartet wird, einschließlich FDA Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert und enthält die folgenden Spalten:

| Indikation | Erwartetes Zulassungsdatum | Brandname | Wirkstoff | Produkteigenschaften | Applikationsformen | Lagerungsbedingungen | Spezielle Patientengruppen | Informationen für Ärzte, Apotheken und Patienten | Wirkmechanismus | Kontraindikationen | Nebenwirkungen | Interaktionen | Schulungshinweise | Zugelassene Konkurrenzprodukte |
|------------|----------------------------|-----------|-----------|----------------------|--------------------|----------------------|---------------------------|-------------------------------------------------|----------------|-------------------|----------------|---------------|------------------|-------------------------------|
| Ak

In [39]:
from openai import OpenAI
import pandas as pd
import json
from dotenv import load_dotenv
import os
import yaml

load_dotenv()

with open("config.yaml", "r") as file:
     config = yaml.safe_load(file)


client = OpenAI(api_key=config["OPENAI_KEY_TEST"],)

response = client.chat.completions.create(
  model="gpt-4o-search-preview",
  messages=[
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": "Erstelle eine umfassende Liste aller Moleküle, für die im Jahr 2025 eine Entscheidung von der US-amerikanischen Food and Drug Administration (FDA) erwartet wird, einschließlich solcher mit FDA Priority Review.\n\n- **Sortierung:** Die Liste sollte nach Indikation und erwartetem Zulassungsdatum geordnet sein.\n\n- **Informationen pro Molekül:** Für jedes aufgeführte Molekül müssen die folgenden Informationen bereitgestellt werden:\n  - Brandname und Wirkstoff\n  - Produkteigenschaften\n  - Applikationsformen (einschließlich Darreichungsformen wie Pen, Fertigspritze, Filmtablette, Kapsel usw.)\n  - Lagerungsbedingungen (mit besonderem Augenmerk auf Kühlanforderungen)\n  - Spezielle Patientengruppen\n  - Informationen für Ärzte, Apotheken, und Patienten\n  - Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweise\n  - Zugelassene Konkurrenzprodukte\n\n# Steps\n\n1. **Ermittlung der Moleküle:** Sammle Daten zu allen Arzneimitteln, für die 2025 eine FDA-Entscheidung erwartet wird, inklusive derjenigen mit Prioritätsprüfung.\n2. **Datenorganisation:** Ordne die Daten nach Indikation und erwartetem Zulassungsdatum.\n3. **Datenaufbereitung:** Stelle die detaillierten Informationen pro Molekül zusammen, wie in der Liste der benötigten Informationen angegeben.\n4. **Zusammstellung der Übersicht:** Gestalte die Übersicht klar und übersichtlich, damit alle relevanten Informationen leicht zu finden sind.\n\n# Output Format\n\nDie Ausgabe sollte in ausführlichen Absätzen sein. Jeder Absatz enthält die vollständigen Informationen zu einem Molekül, sortiert nach Indikation und Zulassungsdatum.\n\n# Beispiel\n\n**Eintrag Beispiel:**\n\n- **Indikation:** [Indikation]\n- **Zulassungsdatum:** [Datum]\n- **Brandname:** [Brandname]\n- **Wirkstoff:** [Wirkstoff]\n- **Produkteigenschaften:** [Beschreibung der Produkteigenschaften]\n- **Applikationsformen:** [Applikationsform 1, Applikationsform 2, ...]\n- **Lagerungsbedingungen:** [Kühlung erforderlich/nicht erforderlich]\n- **Spezielle Patientengruppen:** [Beschreibung der Gruppen]\n- **Informationen für Ärzte/Apotheken/Patienten:** [Detaillierte Informationen]\n- **Wirkmechanismus:** [Beschreibung des Mechanismus]\n- **Kontraindikationen:** [Liste der Kontraindikationen]\n- **Nebenwirkungen:** [Liste der Nebenwirkungen]\n- **Interaktionen:** [Liste der Interaktionen]\n- **Schulungshinweise:** [Schulungshinweise]\n- **Zugelassene Konkurrenzprodukte:** [Produkt 1, Produkt 2, ...]\n\n# Notes\n\n- Achte darauf, dass alle Informationen auf dem neuesten Stand und korrekt sind.\n- Verwende eine einheitliche Struktur für die Darstellungen, um die Übersichtlichkeit zu gewährleisten."
        }
      ]
    }
  ],
  response_format={
    "type": "text"
  },
  web_search_options={
    "user_location": {
      "type": "approximate",
      "approximate": {
        "country": "CH",
        "city": "Solothurn"
      }
    }
  },
  store=False
)

print(response)

ChatCompletion(id='chatcmpl-6d2743dc-fec1-4a51-a25a-708e4fda9239', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Hier ist eine umfassende Liste der Moleküle, für die im Jahr 2025 eine Entscheidung der US-amerikanischen Food and Drug Administration (FDA) erwartet wird, einschließlich solcher mit FDA Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum geordnet.\n\n**Indikation:** Obstruktive hypertrophe Kardiomyopathie\n\n- **Zulassungsdatum:** September 2025\n- **Brandname:** Aficamten\n- **Wirkstoff:** Aficamten\n- **Produkteigenschaften:** Oraler, selektiver Myosin-Inhibitor zur Normalisierung der erhöhten Kontraktilität des Herzmuskels bei obstruktiver hypertropher Kardiomyopathie.\n- **Applikationsformen:** Filmtablette\n- **Lagerungsbedingungen:** Raumtemperatur; keine speziellen Kühlanforderungen.\n- **Spezielle Patientengruppen:** Erwachsene Patienten mit obstruktiver hypertropher Kardiomyopathie.\n-

In [54]:
from openai import OpenAI

response = client.chat.completions.create(
    model="gpt-4o-search-preview",
    messages=[
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": (
                        "You are a pharmaceutical data extraction and analysis expert. Your task is to compile a "
                        "comprehensive list of all molecules (drugs) for which the US FDA is expected to render a decision in "
                        "2025, including those undergoing Priority Review. The output must be sorted by indication and expected "
                        "approval date.\n\n"
                        "For each molecule, provide the following details:\n"
                        "• Indication\n"
                        "• Expected Approval Date\n"
                        "• Brand Name and Active Ingredient\n"
                        "• Product Attributes\n"
                        "• Application Forms (e.g., pen, prefilled syringe, film-coated tablet, capsule, etc.)\n"
                        "• Storage Conditions (highlight if cooling is required)\n"
                        "• Relevant Patient Groups\n"
                        "• Information for Doctors, Pharmacies, and Patients\n"
                        "• Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes\n"
                        "• Approved Competitive Products\n"
                        "• Sources\n\n"
                        "Ensure that all data is cross-checked against reputable sources and clearly note any uncertainties. "
                        "Output the results in two parts:\n"
                        "1. A detailed narrative description for each molecule.\n"
                        "2. A CSV-formatted table where each row corresponds to a molecule and columns match the above fields.\n"
                        "Additionally, include at least 30 active ingredients (Wirkstoffe) in your final table."
                    )
                }
            ]
        },
        {
            "role": "assistant",
            "content": [
                {
                    "type": "text",
                    "text": (
                        "I will generate a detailed table and narrative description of all molecules (drugs) for which the "
                        "FDA decision is expected in 2025, including those with Priority Review. The output will include at "
                        "least 30 distinct active ingredients (Wirkstoffe) and be sorted by indication and expected approval date."
                    )
                }
            ]
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": (
                        "Erstelle vollständige Liste aller Moleküle von allen Indikationen, für die im Jahr 2025 eine Entscheidung der "
                        "US-amerikanischen Food and Drug Administration (FDA) erwartet wird – inklusive FDA Priority Review. "
                        "Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert. Folgende Informationen sollen enthalten sein:\n"
                        "• Brandname und Wirkstoff\n"
                        "• Produkteigenschaften\n"
                        "• Applikationsformen (einschließlich Darreichungsformen wie Pen, Fertigspritze, Filmtablette, Kapsel usw.)\n"
                        "• Lagerungsbedingungen (insbesondere ob eine Kühlung erforderlich ist)\n"
                        "• Spezielle Patientengruppen\n"
                        "• Informationen für Ärzte, Apotheken und Patienten\n"
                        "• Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweise\n"
                        "• Zugelassene Konkurrenzprodukte\n"
                        "• Quellen\n\n"
                        "Bitte stelle zunächst eine ausführliche narrative Beschreibung bereit und generiere anschließend eine "
                        "CSV-Datei mit den entsprechenden Spalten. Ich melde mich, sobald du fertig bist."
                    )
                }
            ]
        }
    ],
    response_format={"type": "text"},
    web_search_options={
        "search_context_size": "high",
        "user_location": {
            "type": "approximate",
            "approximate": {
                "country": "CH",
                "region": "Solothurn",
                "city": "Solothurn"
            }
        }
    },
    store=False
)

print(response)


ChatCompletion(id='chatcmpl-7c7bbc19-d702-48d9-9afe-e12bb7c6422c', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Gerne stelle ich Ihnen eine umfassende Liste der Moleküle bereit, für die im Jahr 2025 eine Entscheidung der US-amerikanischen Food and Drug Administration (FDA) erwartet wird, einschließlich solcher mit Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert. Für jedes Molekül werden die folgenden Informationen bereitgestellt:\n\n- **Indikation**\n- **Erwartetes Zulassungsdatum**\n- **Brandname und Wirkstoff**\n- **Produkteigenschaften**\n- **Applikationsformen**\n- **Lagerungsbedingungen**\n- **Spezielle Patientengruppen**\n- **Informationen für Ärzte, Apotheken und Patienten**\n- **Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweise**\n- **Zugelassene Konkurrenzprodukte**\n- **Quellen**\n\n**Hinweis:** Die bereitgestellten Informationen basieren auf d

In [79]:

client = OpenAI(api_key=config["DEEPSEEK"],base_url="https://api.deepseek.com")

response = client.chat.completions.create(
  model="deepseek-reasoner",
  max_tokens=8000,
  messages=[{ "role": "system", "content": "You are a pharmaceutical data expert and data extraction specialist, your task is to compile a comprehensive list of all molecules (drugs) for all indications for which an FDA decision is expected in 2025, including those with FDA Priority Review. The output must be sorted by indication and expected approval date.\n\nFor each molecule, provide the following data:\n\n- Indication\n- Expected Approval Date\n- Brand Name and Active Ingredient\n- Product Attributes\n- Application Forms (e.g., pen, prefilled syringe, film-coated tablet, capsule, etc.)\n- Storage Conditions (especially whether cooling is required)\n- Relevant Patient Groups\n- Information for Doctors, Pharmacies, and Patients\n- Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes\n- Approved Competitive Products\n- Sources\n\nCross-check data from reputable sources to ensure accuracy and note any uncertainties.\n\n# Steps\n\n1. Extract molecules for which an FDA decision is expected in 2025.\n2. Sort the information by indication and expected approval date.\n3. Provide detailed information as listed for each molecule.\n4. Use placeholders where necessary for information that is specific to individual drugs.\n5. Verify data accuracy against credible sources.\n\n# Output Format\n\nOutput the results in a detailed table format where each row represents a molecule and columns correspond to the aforementioned fields.\n\n# Examples\n\n- Indication: [Example Indication]\n- Expected Approval Date: [Date]\n- Brand Name and Active Ingredient: [Brand Name](Active Ingredient)\n- Product Attributes: [Attributes]\n- Application Forms: [Forms]\n- Storage Conditions: [Conditions]\n- Relevant Patient Groups: [Groups]\n- Information for Doctors, Pharmacies, Patients: [Information]\n- Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes: [Details]\n- Approved Competitive Products: [Products]\n- Sources: [Source Information]\n\n(Note: These are placeholders. Real examples should be completed with accurate and specific data.)"},
    {"role": "user","content":"Erstelle vollständige Tabelle aller Moleküle von allen Indikationen für die im Jahr 2025 eine Entscheidung der US-amerikanischen Food and Drug Administration (FDA) erwartet wird inklusive FDA Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert. Die Liste umfasst im Weiteren: Brandname, Wirkstoff, Produkteigenschaften, Applikationsformen (einschließlich Darreichungsformen wie Pen, Fertigspritze, Filmtablette, Kapsel usw.), Lagerungsbedingungen (insbesondere ob eine Kühlung erforderlich ist), spezielle Patientengruppen, Informationen für Ärzte, Apotheken und Patienten, inklusive Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweisen, zugelassene Konkurrenzprodukte\nAlles klar. Ich stelle eine vollständige Liste aller Arzneimittel (Moleküle) zusammen, für die im Jahr 2025 eine FDA-Entscheidung erwartet wird – einschließlich aller Indikationen und sowohl regulärer als auch Priority Reviews.\nDie Übersicht wird nach Indikation und erwartetem Zulassungsdatum sortiert. Zu jedem Produkt werde ich die folgenden Informationen angeben:\n·         Brandname und Wirkstoff\n·         Produkteigenschaften\n·         Applikationsformen (inkl. Darreichungsformen wie Pen, Fertigspritze, Filmtablette usw.)\n·         Lagerungsbedingungen (v. a. ob Kühlung erforderlich ist)\n·         Relevante Patientengruppen\n·         Informationen für Ärzte, Apotheken und Patienten\n·         Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweise\n·         Zugelassene Konkurrenzprodukte\nIch melde mich, sobald ich die Ergebnisse für dich zusammengestellt habe."
        }
      ],
  stream =False) 
print(response.choices[0].message.content)

**Hinweis:** Die folgende Tabelle ist ein **Beispiel** basierend auf öffentlich zugänglichen Daten bis Oktober 2023. Da viele FDA-Entscheidungen für 2025 noch nicht finalisiert sind, werden **Platzhalter** für unbestätigte Informationen verwendet. Aktualisierungen erfordern eine Überprüfung durch aktuelle Quellen.

---

### **FDA-Entscheidungen 2025: Arzneimittelliste (nach Indikation und Datum sortiert)**  
| **Indikation**                     | **Erwartetes Zulassungsdatum** | **Brandname (Wirkstoff)**             | **Produkteigenschaften**                                                                 | **Applikationsformen**        | **Lagerungsbedingungen**           | **Relevante Patientengruppen**                | **Informationen für Ärzte/Apotheken/Patienten**                                                                 | **Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen, Schulung**                                                                          

In [None]:
from openai import OpenAI
from openai import OpenAI
import pandas as pd
import json
from dotenv import load_dotenv
import os
import yaml

load_dotenv()

with open("config.yaml", "r") as file:
     config = yaml.safe_load(file)


client = OpenAI(api_key=config["OPENAI_KEY_TEST"],)




response = client.chat.completions.create(
  model="gpt-4o",
  max_tokens=16384,
  messages=[
    {
      "role": "system",
      "content": [
        {
          "text": "You are a pharmaceutical data expert and data extraction specialist. Your task is to compile a comprehensive list of all drug molecules for which an FDA decision is expected in 2025, including those with FDA Priority Review. This list must be detailed and organized by indication and expected approval date.\n\nGather the following details for each molecule:\n\n- **Indication**\n- **Expected Approval Date**\n- **Brand Name and Active Ingredient**\n- **Product Attributes**\n- **Application Forms** (e.g., pen, prefilled syringe, film-coated tablet, capsule, etc.)\n- **Storage Conditions** (especially whether cooling is required)\n- **Relevant Patient Groups**\n- **Information for Doctors, Pharmacies, and Patients**\n- **Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes**\n- **Approved Competitive Products**\n- **Sources**\n\nEnsure accuracy by cross-checking data from reputable sources and note any uncertainties encountered during the information collection process.\n\n# Steps\n\n1. Extract a list of molecules with an FDA decision expected in 2025.\n2. Organize the data by indication and expected approval date.\n3. Collect and compile detailed information for each molecule as listed above.\n4. Use placeholders for data specific to individual drugs where necessary.\n5. Validate the collected data against credible sources to ensure accuracy.\n\n# Output Format\n\nPresent the results in a detailed table format, where each row represents a molecule, and the columns align with the aforementioned data fields.\n\n# Examples\n\n- **Indication:** [Example Indication]\n- **Expected Approval Date:** [Date]\n- **Brand Name and Active Ingredient:** [Brand Name] (Active Ingredient)\n- **Product Attributes:** [Attributes]\n- **Application Forms:** [Forms]\n- **Storage Conditions:** [Conditions]\n- **Relevant Patient Groups:** [Groups]\n- **Information for Doctors, Pharmacies, Patients:** [Information]\n- **Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes:** [Details]\n- **Approved Competitive Products:** [Products]\n- **Sources:** [Source Information]\n\n(Note: These are placeholders. Real examples should be completed with accurate and specific data.)\n\n# Notes\n\n- Pay particular attention to any molecules marked for FDA Priority Review, as these often have implications for earlier approval.\n- If data discrepancies arise, ensure to document sources and note the details of any uncertainties.\n- Ensure that the information is up-to-date and reflects the most recent submissions and reviews from the FDA.",
          "type": "text"
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "text": "Create a complete table of all molecules for all indications for which a decision by the US Food and Drug Administration (FDA) is expected in 2025, including FDA Priority Review. The list is sorted by indication and expected approval date. Furthermore, the list includes: brand name, active ingredient, product properties, forms of application (including dosage forms such as pen, prefilled syringe, film-coated tablet, capsule, etc.), storage conditions (especially whether cooling is required), special patient groups, and information for physicians, pharmacies, and patients, including mechanism of action, contraindications, side effects, interactions and training instructions, and approved competitor products.\nAll right. I will compile a complete list of all medications (molecules) for which an FDA decision is expected in 2025 – including all indications and both regular and Priority Reviews.\nThe overview will be sorted by indication and expected approval date. For each product, I will provide the following information: · Brand name and active ingredient · Product properties · Forms of application (including dosage forms such as pen, prefilled syringe, film-coated tablet, etc.) · Storage conditions (especially whether cooling is required) · Relevant patient groups · Information for physicians, pharmacies, and patients · Mechanism of action, contraindications, side effects, interactions, and training instructions · Approved competitor products\nI will get back to you as soon as I have compiled the results for you.",
          "type": "text"
        }
      ]
    },
    {
      "role": "assistant",
      "content": [
        {
          "type": "text",
          "text": "For example :  As of March 31, 2025, the U.S. Food and Drug Administration (FDA) has approved several novel drugs and vaccines, with additional decisions anticipated throughout the year. Below is a comprehensive table of these medications, organized by indication and expected approval date. Each entry includes detailed information on the brand name, active ingredient, product properties, forms of application, storage conditions, special patient groups, and pertinent information for healthcare providers and patients.\n\n| Indication | Expected Approval Date | Brand Name | Active Ingredient | Product Properties | Forms of Application | Storage Conditions | Special Patient Groups | Mechanism of Action | Contraindications | Side Effects | Interactions | Training Instructions | Approved Competitor Products |\n|------------|------------------------|------------|-------------------|--------------------|----------------------|--------------------|------------------------|---------------------|-------------------|--------------|-------------|----------------------|------------------------------|\n| **Tenosynovial Giant Cell Tumor** | February 14, 2025 | Romvimza | Vimseltinib | Switch-control kinase inhibitor targeting CSF1R | Oral capsules | Store at room temperature | Adults with symptomatic tenosynovial giant cell tumor | Inhibits CSF1R to reduce tumor growth | Hypersensitivity to vimseltinib | Fatigue, nausea, liver enzyme elevation | Potential interactions with CYP3A4 inhibitors | Patients should be monitored for liver function | None approved |\n| **Neurofibromatosis Type 1 with Symptomatic Plexiform Neurofibromas** | February 11, 2025 | Gomekli | Mirdametinib | MEK inhibitor | Oral capsules | Store at room temperature | Pediatric and adult patients with NF1 and inoperable tumors | Inhibits MEK1/2 to reduce tumor size | Hypersensitivity to mirdametinib | Rash, diarrhea, elevated creatine phosphokinase | Avoid concomitant use with strong CYP3A4 inhibitors | Regular monitoring of cardiac function recommended | None approved |\n| **Moderate to Severe Acute Pain** | January 30, 2025 | Journavx | Suzetrigine | Non-opioid analgesic, sodium channel blocker | Oral tablets (50 mg) | Store at room temperature | Adults requiring short-term pain management | Selectively inhibits NaV1.8 sodium channels to block pain signals | Hypersensitivity to suzetrigine | Dizziness, headache, gastrointestinal discomfort | Caution with other CNS depressants | Patients should be advised on potential CNS effects | Opioid analgesics (e.g., hydrocodone) |\n| **Acute Myeloid Leukemia and Myelodysplastic Syndrome** | January 21, 2025 | Grafapex | Treosulfan | Alkylating agent used in conditioning regimens | Intravenous infusion | Refrigerate; protect from light | Patients undergoing allogeneic hematopoietic stem cell transplantation | Cross-links DNA to inhibit cell division | Hypersensitivity to treosulfan | Myelosuppression, mucositis, hepatotoxicity | Increased toxicity with other myelosuppressive agents | Healthcare providers should monitor blood counts and organ function | Busulfan |\n| **Unresectable or Metastatic HR-Positive, HER2-Negative Breast Cancer** | January 17, 2025 | Datroway | Datopotamab deruxtecan-dlnk | Antibody-drug conjugate targeting TROP2 | Intravenous infusion | Refrigerate; protect from light | Adults with prior endocrine-based therapy and chemotherapy | Delivers cytotoxic agent to TROP2-expressing cancer cells | Hypersensitivity to components | Nausea, neutropenia, interstitial lung disease | Caution with other immunosuppressants | Patients should be monitored for pulmonary symptoms | Sacituzumab govitecan |\n| **Meningococcal Disease Prophylaxis** | February 14, 2025 | Penmenvy | Meningococcal groups A, B, C, W, and Y vaccine | Lyophilized powder for injection | Intramuscular injection | Refrigerate; do not freeze | Individuals aged 10 through 25 years | Induces immunity against Neisseria meningitidis serogroups A, B, C, W, and Y | Severe allergic reaction to vaccine components | Injection site reactions, fever, headache | None significant | Healthcare providers should be prepared for anaphylaxis management | Menveo, Bexsero |\n| **Chikungunya Disease Prevention** | February 14, 2025 | Vimkunya | Chikungunya vaccine, recombinant | Injectable suspension | Intramuscular injection | Refrigerate; do not freeze | Individuals aged 12 years and older | Induces immunity against chikungunya virus | Severe allergic reaction to vaccine components | Injection site reactions, fever, myalgia | None significant | Patients should be monitored for allergic reactions post-vaccination | None approved |\n| **Diabetes Mellitus** | February 14, 2025 | Merilog | Insulin aspart-szjj | Rapid-acting human insulin analog biosimilar to NovoLog | Subcutaneous injection | Refrigerate; do not freeze | Adults and pediatric patients with diabetes mellitus | Facilitates glucose uptake by cells | Hypoglycemia, hypersensitivity to insulin aspart | Hypoglycemia, weight gain, injection site reactions | Caution with other glucose-lowering agents | Patients should be trained on injection technique and hypoglycemia management | NovoLog |\n| **Uncomplicated Urinary Tract Infections** | March 26, 2025 (expected) | Gepotidacin | Gepotidacin | First-in-class oral antibiotic | Oral tablets | Store at room temperature | Female adults and adolescents with uUTIs | Inhibits bacterial DNA replication | Hypersensitivity to gepotidacin | Nausea, diarrhea, headache | Potential interactions with other antibiotics | Patients should complete the full course as prescribed | Nitrofurantoin, trimethoprim-sulfamethoxazole |\n| **Hereditary Angioedema Attacks** | June 17, 2025 (expected) | Sebetralstat | Sebetralstat | Oral plasma kallikrein inhibitor | Oral disintegrating tablets | Store at room temperature | Patients aged 12 years and older with HAE | Inhibits plasma kallikrein to reduce bradykinin production | Hypersensitivity to sebetralstat | Nausea, headache, dizziness | Caution with other kallikrein inhibitors | Patients should be trained on recognizing and managing HAE attacks | Icatibant (injection) |\n| **Vasomotor Symptoms Associated with Menopause** | July 26, 2025 (expected) | Elinzanetant | Elinzanetant | Dual NK-1 and NK-3 receptor antagonist | Oral tablets | Store at room temperature | Women experiencing moderate to severe VMS | Modulates KNDy neurons to reduce hot flashes | Hypersensitivity to elinzanetant | Nausea, headache, fatigue | Potential interactions with CYP3A4 inhibitors | Patients should be informed about the non-hormonal nature of the treatment | Hormone replacement therapy |\n| **H3 K27M-Mutant Diffuse Glioma** | August 18, 2025 (expected "
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "This is too short... it need to be logner-"
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "retry: Erstelle vollständige Tabelle aller Moleküle von allen Indikationen für die im Jahr 2025 eine Entscheidung der US-amerikanischen Food and Drug Administration (FDA) erwartet wird inklusive FDA Priority Review. Die Liste ist nach Indikation und erwartetem Zulassungsdatum sortiert. Die Liste umfasst im Weiteren: Brandname, Wirkstoff, Produkteigenschaften, Applikationsformen (einschließlich Darreichungsformen wie Pen, Fertigspritze, Filmtablette, Kapsel usw.), Lagerungsbedingungen (insbesondere ob eine Kühlung erforderlich ist), spezielle Patientengruppen, Informationen für Ärzte, Apotheken und Patienten, inklusive Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweisen, zugelassene Konkurrenzprodukte\\nAlles klar. Ich stelle eine vollständige Liste aller Arzneimittel (Moleküle) zusammen, für die im Jahr 2025 eine FDA-Entscheidung erwartet wird – einschließlich aller Indikationen und sowohl regulärer als auch Priority Reviews.\\nDie Übersicht wird nach Indikation und erwartetem Zulassungsdatum sortiert. Zu jedem Produkt werde ich die folgenden Informationen angeben:\\n·         Brandname und Wirkstoff\\n·         Produkteigenschaften\\n·         Applikationsformen (inkl. Darreichungsformen wie Pen, Fertigspritze, Filmtablette usw.)\\n·         Lagerungsbedingungen (v. a. ob Kühlung erforderlich ist)\\n·         Relevante Patientengruppen\\n·         Informationen für Ärzte, Apotheken und Patienten\\n·         Wirkmechanismus, Kontraindikationen, Nebenwirkungen, Interaktionen und Schulungshinweise\\n·         Zugelassene Konkurrenzprodukte\\nIch melde mich, sobald ich die Ergebnisse für dich zusammengestellt habe."
        }
      ]
    }
  ],
  response_format={
    "type": "json_schema",
    "json_schema": {
      "name": "fda_drug_approvals_2025",
      "schema": {
        "type": "object",
        "required": ["molecules"],
        "properties": {"molecules": {"type": "array","items": {"type": "object","required": [
                "indication",
                "expected_approval_date",
                "brand_name",
                "active_ingredient",
                "product_properties",
                "competitor_products",
                "sources"],
        "properties": {
                "brand_name": {
                  "type": "string",
                  "description": "The brand name of the medication."
                },
                "indication": {
                  "type": "string",
                  "description": "The medical indication for which the drug is intended."
                },
                "sources": {
                  "type": "string",
                  "description": "Source urls for the information provided."
                },
                "active_ingredient": {
                  "type": "string",
                  "description": "The active ingredient contained in the medication."
                },
                "product_properties": {
                  "type": "object",
                  "required": [
                    "formulations",
                    "storage_conditions",
                    "special_patient_groups",
                    "info_for_healthcare"
                  ],
                  "properties": {
                    "formulations": {
                      "type": "array",
                      "items": {
                        "type": "string"
                      },
                      "description": "Different formulations of the product."
                    },
                    "storage_conditions": {
                      "type": "string",
                      "description": "Conditions under which the medication should be stored, including cooling requirements."
                    },
                    "info_for_healthcare": {
                      "type": "object",
                      "required": [
                        "mechanism_of_action",
                        "contraindications",
                        "side_effects",
                        "interactions",
                        "training_instructions"
                      ],
                      "properties": {
                        "interactions": {
                          "type": "array",
                          "items": {
                            "type": "string"
                          },
                          "description": "Known interactions with other drugs or substances."
                        },
                        "side_effects": {
                          "type": "array",
                          "items": {
                            "type": "string"
                          },
                          "description": "Potential adverse effects of the drug."
                        },
                        "contraindications": {
                          "type": "array",
                          "items": {
                            "type": "string"
                          },
                          "description": "Conditions or factors that would make the drug unsafe or inappropriate to use."
                        },
                        "mechanism_of_action": {
                          "type": "string",
                          "description": "How the drug works in the body."
                        },
                        "training_instructions": {
                          "type": "string",
                          "description": "Instructions for training on how to use the medication."
                        }
                      },
                      "description": "Information for healthcare professionals and patients.",
                      "additionalProperties": False
                    },
                    "special_patient_groups": {
                      "type": "array",
                      "items": {
                        "type": "string"
                      },
                      "description": "Groups of patients for whom special considerations may apply."
                    }
                  },
                  "description": "Properties of the product that are relevant for understanding its characteristics.",
                  "additionalProperties": False
                },
                "competitor_products": {
                  "type": "array",
                  "items": {
                    "type": "string"
                  },
                  "description": "Other approved products that serve as competitors."
                },
                "expected_approval_date": {
                  "type": "string",
                  "description": "The anticipated date of FDA approval."
                }
              },
              "additionalProperties": False
            },
            "description": "A list of all molecules for which FDA decisions are expected in 2025, sorted by indication and expected approval date."
          }
        },
        "additionalProperties": False
      },
      "strict": True
    }
  },
  reasoning={},
  tools=[{"type": "web_search"}],
  store=True
)


BadRequestError: Error code: 400 - {'error': {'message': "Missing required parameter: 'tools[0].function'.", 'type': 'invalid_request_error', 'param': 'tools[0].function', 'code': 'missing_required_parameter'}}

In [None]:
response = response.choices[0].message.content
response




'{"molecules": [{"brand_name": "Vykat XR", "indication": "Prader-Willi-Syndrom", "sources": "https://www.reuters.com/business/healthcare-pharmaceuticals/us-fda-greenlights-first-ever-treatment-rare-metabolic-disorder-2025-03-26/", "active_ingredient": "Diazoxid-Cholin", "product_properties": {"formulations": ["Retardtabletten"], "storage_conditions": "Bei Raumtemperatur lagern", "info_for_healthcare": {"interactions": ["Vorsicht bei gleichzeitiger Anwendung mit blutdrucksenkenden Medikamenten"], "side_effects": ["Übelkeit", "Schwindel", "Hypoglykämie"], "contraindications": ["Überempfindlichkeit gegen Diazoxid"], "mechanism_of_action": "Aktiviert ATP-sensitive Kaliumkanäle, reduziert die Sekretion appetitanregender Neuropeptide", "training_instructions": "Patienten sollten über die Erkennung und Behandlung von Hypoglykämie informiert werden"}, "special_patient_groups": ["Kinder ab 4 Jahren mit Prader-Willi-Syndrom"]}, "competitor_products": ["Keine zugelassenen Konkurrenzprodukte"], "e

In [127]:
import demjson3
raw_output = response.strip()
if raw_output.startswith("```") and raw_output.endswith("```"):
    raw_output = raw_output.strip("```")
    if raw_output.lstrip().startswith("json"):
        raw_output = raw_output.lstrip()[len("json"):].strip()
print("First 500 characters of cleaned output:")
print(raw_output[:500])

First 500 characters of cleaned output:
{"molecules": [{"brand_name": "Vykat XR", "indication": "Prader-Willi-Syndrom", "sources": "https://www.reuters.com/business/healthcare-pharmaceuticals/us-fda-greenlights-first-ever-treatment-rare-metabolic-disorder-2025-03-26/", "active_ingredient": "Diazoxid-Cholin", "product_properties": {"formulations": ["Retardtabletten"], "storage_conditions": "Bei Raumtemperatur lagern", "info_for_healthcare": {"interactions": ["Vorsicht bei gleichzeitiger Anwendung mit blutdrucksenkenden Medikamenten"], 


In [130]:
import demjson3
import json

# Save raw output for inspection
with open("raw_output.txt", "w", encoding="utf-8") as f:
    f.write(raw_output)
print("Raw output saved to raw_output.txt")

try:
    # Try to parse with standard json first
    data = json.loads(raw_output)
except json.JSONDecodeError:
    # If that fails, use demjson3 which is more tolerant
    try:
        data = demjson3.decode(raw_output)
        print("Successfully parsed with demjson3")
    except demjson3.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        # Attempt to repair the JSON by adding closing brackets
        repaired_json = raw_output
        if '"molecules":' in raw_output and not raw_output.strip().endswith('}'):
            # Check if we're missing closing brackets for array and/or object
            if '[' in raw_output and ']' not in raw_output[-5:]:
                repaired_json += "]}"
            elif not repaired_json.strip().endswith('}'):
                repaired_json += "}"
            
            try:
                data = demjson3.decode(repaired_json)
                print("Successfully parsed after repairing JSON")
            except Exception as e:
                print(f"Failed to repair JSON: {e}")
                # Create a minimal valid structure to avoid errors in subsequent cells
                data = {"molecules": []}



Raw output saved to raw_output.txt
Error parsing JSON: String literal is not terminated
Failed to repair JSON: String literal is not terminated


In [131]:
# dict to an excel file 
import pandas as pd
df = pd.DataFrame(data["molecules"])
df.to_excel("FDA_2025_molecules.xlsx", index=False)

In [162]:
from openai import OpenAI
from pydantic import BaseModel
from typing import List
import pandas as pd
import json
from dotenv import load_dotenv
import os
import yaml

# Define Pydantic models with strict validation
class ApprovedCompetingProduct(BaseModel):
    product_name: str
    source_url: str

    class Config:
        extra = "forbid"

class InformationFor(BaseModel):
    doctors: str
    pharmacies: str
    patients: str

    class Config:
        extra = "forbid"

class ProductProperties(BaseModel):
    description: str
    application_forms: List[str]
    storage_conditions: str
    relevant_patient_groups: List[str]
    information_for: InformationFor
    mechanism_of_action: str
    contraindications: str
    side_effects: str
    interactions: str
    training_instructions: str
    approved_competing_products: List[ApprovedCompetingProduct]

    class Config:
        extra = "forbid"

class ProductOverview(BaseModel):
    brand_name: str
    product_properties: ProductProperties

    class Config:
        extra = "forbid"

# Load configuration (API key etc.)
load_dotenv()

with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

client = OpenAI(api_key=config["OPENAI_KEY_TEST"])

# Call the API with the desired input and output format
response = client.responses.create(
    model="gpt-4o-2024-11-20",
    input=[
        {
            "role": "system",
            "content": [
                {
                    "type": "input_text",
                    "text": (
                        "You are a pharmaceutical data expert and data extraction specialist. Your task is to compile a comprehensive list of all drug molecules for which an FDA decision is expected in 2025, including those with FDA Priority Review. This list must be detailed and organized by indication and expected approval date.\n\n"
                        "Gather the following details for each molecule:\n\n"
                        "- **Indication**\n"
                        "- **Expected Approval Date**\n"
                        "- **Brand Name and Active Ingredient**\n"
                        "- **Product Attributes**\n"
                        "- **Application Forms** (e.g., pen, prefilled syringe, film-coated tablet, capsule, etc.)\n"
                        "- **Storage Conditions** (especially whether cooling is required)\n"
                        "- **Relevant Patient Groups**\n"
                        "- **Information for Doctors, Pharmacies, and Patients**\n"
                        "- **Mechanism of Action, Contraindications, Side Effects, Interactions, and Training Notes**\n"
                        "- **Approved Competitive Products**\n"
                        "- **Sources**\n\n"
                        "Ensure accuracy by cross-checking data from reputable sources and note any uncertainties encountered during the information collection process."
                    )
                }
            ]
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "input_text",
                    "text": (
                        "Now give me a complete table of all molecules for all indications for which a decision by the US Food and Drug Administration (FDA) is expected in 2025, including FDA Priority Review. The list is sorted by indication and expected approval date. Furthermore, the list includes: brand name, active ingredient, product properties, forms of application (including dosage forms such as pen, prefilled syringe, film-coated tablet, capsule, etc.), storage conditions (especially whether cooling is required), special patient groups, and information for physicians, pharmacies, and patients, including mechanism of action, contraindications, side effects, interactions and training instructions, and approved competitor products.\n"
                    )
                }
            ]
        }
    ],
    text={
        "format": {
            "type": "json_schema",
            "name": "product_overview",
            "strict": True,
            "schema": ProductOverview.schema()
        }
    },
    reasoning={},
    tools=[{"type": "web_search"}],
    temperature=0.97,
    max_output_tokens=9030,
    top_p=1,
    store=True
)

if response.status == "incomplete" and response.incomplete_details.reason == "max_output_tokens":
    print("Ran out of tokens")
    if response.output_text:
        print("Partial output:", response.output_text)
    else:
        print("Ran out of tokens during reasoning")


C:\Users\FahRe\AppData\Local\Temp\ipykernel_12116\1510796883.py:102: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "schema": ProductOverview.schema()


In [163]:
print(response.output_text)

{"brand_name":"Suzetrigine","product_properties":{"description":"Suzetrigine is a non-opioid pain medication developed for the treatment of moderate-to-severe acute pain. It functions as an oral, selective inhibitor of NaV1.8, a voltage-gated sodium channel that plays a critical role in pain signaling in the peripheral nervous system.","application_forms":["Oral tablet"],"storage_conditions":"Store at room temperature; no special cooling required.","relevant_patient_groups":["Adults with moderate-to-severe acute pain"],"information_for":{"doctors":"Suzetrigine offers a non-opioid alternative for managing acute pain, potentially reducing the risk of addiction associated with opioid use.","pharmacies":"Ensure proper dispensing and patient education regarding the non-opioid nature and administration of suzetrigine.","patients":"Suzetrigine is a non-opioid medication designed to manage acute pain without the risks associated with opioids."},"mechanism_of_action":"Suzetrigine selectively in

In [None]:
import json
print(response.output_text)
print(json.dumps(response.output, default=lambda o: o.__dict__, indent=2))

[
  {
    "id": "ws_67ea9eb223248191bc5bb250f6bfcd320348dd1be5657452",
    "status": "completed",
    "type": "web_search_call"
  },
  {
    "id": "msg_67ea9eb50e248191955475841847cdc10348dd1be5657452",
    "content": [
      {
        "annotations": [],
        "text": "{\"brand_name\":\"Suzetrigine\",\"product_properties\":{\"description\":\"Suzetrigine is a non-opioid pain medication developed for the treatment of moderate-to-severe acute pain. It functions as an oral, selective inhibitor of NaV1.8, a voltage-gated sodium channel that plays a critical role in pain signaling in the peripheral nervous system.\",\"application_forms\":[\"Oral tablet\"],\"storage_conditions\":\"Store at room temperature; no special cooling required.\",\"relevant_patient_groups\":[\"Adults with moderate-to-severe acute pain\"],\"information_for\":{\"doctors\":\"Suzetrigine offers a non-opioid alternative for managing acute pain, potentially reducing the risk of addiction associated with opioid use.\",\"ph