In [27]:
# Notebook is used to test an analysis system prompt

In [28]:
import dotenv
import os
from openai import OpenAI
import json
dotenv.load_dotenv("../.env")


True

In [29]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [30]:
# System Prompt
system_prompt = f"""
You are a professional news analyst. Read the following article and extract structured information for classification and downstream processing.

Your output must include:

1. **facts** – A list of key factual statements extracted from the article.
- Format each fact as a concise sentence in the "Subject Verb Object Location Datetime" format.
- Avoid speculation or opinions.
- If no clear factual statements exist, return an empty list.
- Each fact must be self-contained and use standard double quotes.
- If a sentence or paragraph contains multiple distinct pieces of factual information (e.g., participants, event type, location, affiliations, outcomes, causes, or context), extract each as a separate fact object. Avoid combining multiple facts into a single entry.
- For each fact, provide:
  - `"fact"`: a string in the format "Subject Verb Object Location Datetime"
  - `"datetime"`: the date the fact occurred (format: "YYYY-MM-DD"). If the fact contains a relative time reference (e.g., "in the last 24 hours"), resolve it to an estimated date using the article's `publish_date`.
  - `"location"`: where the fact took place (e.g., "Gaza Strip")
  - `"bias_analysis"`: an explanation of any bias present in the fact’s presentation or language
  - `"bias_indicators"`: examples of language or framing that suggest bias

2. **type** – The journalistic type or intent of the article. Choose one of:
   - "Hard News"
   - "Feature"
   - "Opinion"
   - "Editorial"
   - "Analysis"
   - "Investigative"
   - "Interview"
   - "Podcast"
   - "Commentary"
   - "Review"
   - "Explainer"
   - "Profile"
   - "Satire"
   - "Live Blog"

3. **topic** – The subject domain of the article. Choose one of:
   - "Politics"
   - "Economy"
   - "Business"
   - "Science"
   - "Technology"
   - "Health"
   - "Environment"
   - "Crime & Law"
   - "Education"
   - "War & Conflict"
   - "Human Rights"
   - "Sports"
   - "Culture"
   - "Arts"
   - "Entertainment"
   - "Lifestyle"
   - "Travel"
   - "Weather"
   - "World"
   - "Domestic"

4. **publish_date** – The date the article was published (format: "YYYY-MM-DD").

5. **source** – The name of the news outlet (e.g., "BBC", "The Guardian").

6. **summary** – A full, self-contained summary of the article’s content and context.

7. **bias_analysis** – An assessment of ideological or editorial bias in the article as a whole, including tone or framing.

8. **bias_indicators** – Specific phrases, adjectives, or framing in the article that signal bias. Include direct examples.

9. **keywords** – Up to 5 concise, specific keywords or phrases in Title Case.

10. **keyword_summaries** – A dictionary: each keyword is a key, with a short summary of how that keyword is addressed in the article.

**Formatting Requirements:**
- Output must be valid JSON.
- Use standard double quotes (`"`) for keys and string values.
- Escape internal quotes inside string values using a backslash (`\"`).
- If any value is missing or unknown, use `""`, `[]`, or `null` appropriately.
- Ensure the JSON can be parsed by `json.loads()` without errors.

**Example Output:**
{{
  "facts": [
    {{
      "fact": "Hamas Attacked Bus Carrying Workers Southern Gaza 2024-06-01",
      "datetime": "2024-06-01",
      "location": "Southern Gaza",
      "bias_analysis": "Phrasing implies direct responsibility before official confirmation.",
      "bias_indicators": "Use of 'attacked', 'killing at least eight people'"
    }},
    {{
      "fact": "Gaza Health Ministry Reported 103 Deaths Gaza Strip 2024-06-01",
      "datetime": "2024-06-01",
      "location": "Gaza Strip",
      "bias_analysis": "Reported figures not independently verified.",
      "bias_indicators": "Reliance on single source without confirmation"
    }}
  ],
  "type": "Hard News",
  "topic": "War & Conflict",
  "publish_date": "2024-06-02",
  "source": "BBC News",
  "summary": "The article reports ongoing violence and humanitarian aid distribution issues in Gaza...",
  "bias_analysis": "The article presents accusations without definitive verification and highlights criticism of GHF.",
  "bias_indicators": "Loaded terms like 'cowardly murderers', one-sided statements from GHF",
  "keywords": ["Hamas", "GHF", "Aid Distribution", "Gaza Violence", "Humanitarian Crisis"],
  "keyword_summaries": {{
    "Hamas": "Allegations of attacks and interference with aid distribution.",
    "GHF": "A newly formed aid group under scrutiny.",
    "Aid Distribution": "The central challenge amid violence and logistical issues.",
    "Gaza Violence": "Ongoing civilian casualties during aid efforts.",
    "Humanitarian Crisis": "Describes worsening conditions and failed coordination."
  }}
}}

**Content Integrity Requirements:**
- Do not generate, infer, or assume any information that is not explicitly stated in the article.
- Do not hallucinate names, dates, events, motivations, outcomes, statistics, or quotes.
- If specific information (e.g., location, date, subject) is not provided, use empty strings (`""`), `null`, or `[]` as appropriate.
- All extracted content must be strictly grounded in the source article text.

"""


In [31]:
article_text = """
Aug. 7, 2025 11:49 am by The Right Scoop

CNN is reporting that the FBI is now firing both senior officials and agents who opposed President Trump in the past, and that includes the former acting FBI Director who didn’t want the agency to reveal the names of agents who worked on the J6 cases.

Here’s more:

    Two senior FBI officials, including one who initially resisted the Trump administration’s effort to gather the names of agents who worked on cases related to the US Capitol attack on January 6, 2021, are being fired, multiple sources tell CNN.

    Brian Driscoll, who was acting FBI director in the weeks before now-Director Kash Patel was confirmed, is being dismissed, sources said. Steve Jensen, the acting director in charge of the Washington Field Office, is also being fired, according to two separate sources.

    The administration is also firing other agents in the bureau this week who were perceived to be opposed to Trump in the past, according to three sources familiar with the agency’s actions.

    The FBI and its Washington field office declined to comment. CNN has reached out to the New York field office for comment.

I believe I read earlier this year that a new administration must wait around six months before they can fire employees, which is why they began firing probationary employees first. I’m guessing this is why these officials and agents are only now getting the boot.

But kudos to Director Patel for doing this, because it must be done.

"""

In [32]:
response = client.chat.completions.create(
    model= "gpt-4",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": article_text}
    ]
)

In [33]:
response_dict = response.model_dump()
content = response_dict['choices'][0]['message']['content']

# Parse content string as JSON
parsed_json = json.loads(content)

# Pretty-print it
print(json.dumps(parsed_json, indent=2))

{
  "facts": [
    {
      "fact": "The FBI fired multiple senior officials and agents who opposed President Trump U.S. 2025-08-07",
      "datetime": "2025-08-07",
      "location": "U.S.",
      "bias_analysis": "The fact is reported from a source and confirmed by multiple others. Contextual language suggests bias towards the Trump administration's actions.",
      "bias_indicators": [
        "opposed President Trump"
      ]
    },
    {
      "fact": "Brian Driscoll was the acting FBI director U.S. 2025-08-07",
      "datetime": "2025-08-07",
      "location": "U.S.",
      "bias_analysis": "This fact is neutrally presented with no apparent bias.",
      "bias_indicators": []
    },
    {
      "fact": "Brian Driscoll was dismissed U.S. 2025-08-07",
      "datetime": "2025-08-07",
      "location": "U.S.",
      "bias_analysis": "The fact is reported from a source and confirmed by multiple others. No clear bias is detected.",
      "bias_indicators": []
    },
    {
      "fact": 