# News Query & Summary App using LLM

Load necessary packages.

In [1]:
import cohere
import json
from datetime import date
import requests
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from io import BytesIO



Define test prompt.

In [2]:
test_prompt = "What's happening in Egypt right now?"

Create prompt to GDELT parameter transformation.

In [3]:
def get_gdelt_params(user_prompt):

    # Initialize Cohere client
    with open("key.txt") as f:
        COHERE_API_KEY = f.read()
    cohere_client = cohere.ClientV2(COHERE_API_KEY)

    # Save the date for today
    today = date.today().strftime("%Y%m%d")

    # Define the system parameters
    system_prompt = f"""
    The user will write a text asking for news about a topic during a period.
    Extract the information necessary to populate this json object:
    {{
    "query": X,
    "startdate": X,
    "enddate": X
    }}
    The query should include a certain action (e.g. "terror" or "protest") and region (e.g. "france" or "paris"). It is intended for the GDELT API.
    The startdate and enddate should be in the format YYYYMMDD. Remember that today is {today}.
    If the information is not present, return "N/A".
    Respond only with the json object.
    """

    # Check if model input is string
    if not isinstance(user_prompt, str):
        user_prompt = str(user_prompt)

    # Get the model response
    raw_response = cohere_client.chat(
        messages = [
        {
            'role': 'system',
            'content': system_prompt  
        }, 
        {
            'role': 'user',
            'content': user_prompt
        }
        ],
        model='command-r-plus'
    )

    # Save the response
    response_text = raw_response.message.content[0].text

    # Check if response format is correct
    if '```json\n' in response_text:
        response_text_clean = response_text.split('```json\n')[1].split('\n```')[0].strip()
        try:
            response_json = json.loads(response_text_clean)
        except Exception:
            return response_text_clean
    else:
        try:
            response_json = json.loads(response_text)
        except Exception:
            return response_text
    
    # Check if the response JSON contains N/A's
    if response_json['query'] == "N/A":
        return "Query not found"
    if response_json['startdate'] == "N/A" and response_json['enddate'] == "N/A":
        response_json['enddate'] = str(int(today) - 1)
        response_json['startdate'] = str(int(response_json['enddate']) - 7)
    if response_json['startdate'] == "N/A":
        response_json['startdate'] = str(int(response_json['enddate']) - 7)
    if response_json['enddate'] == "N/A":
        response_json['enddate'] = str(int(today) - 1)

    # Check if the response JSON dates are valid
    if response_json['enddate'] == today:
        response_json['enddate'] = str(int(today) - 1)
    if int(response_json['enddate']) < int(response_json['startdate']):
        response_json['startdate'] = str(int(response_json['enddate']) - 7)

    # Return the response
    response = response_json
    return response

Test transformation.

In [4]:
test_params = get_gdelt_params(test_prompt)
test_params

{'query': 'egypt', 'startdate': '20250322', 'enddate': '20250329'}

Create GDELT parameter to GDELT articles query.

In [5]:
def get_gdelt_articles(gdelt_params):

    # Extract variables parameters
    query = gdelt_params['query']
    startdate = gdelt_params['startdate']
    enddate = gdelt_params['enddate']

    # Save API attributes
    url = "https://api.gdeltproject.org/api/v2/doc/doc"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/115.0.0.0 Safari/537.36"
        )
    }

    # Save API parameters
    params = {
        "query": f"{query}",
        "startdatetime": f"{startdate}000000",
        "enddatetime": f"{enddate}000000",
        "mode": "artlist",
        "format": "json",
        "maxrecords": 50,
        "sort": "hybridrel"
    }

    # Get the API response
    raw_response = requests.get(url, params=params, headers=headers)

    # Check if API call is working and response format is valid and return the response
    if raw_response.status_code == 200:
        try:
            response = json.loads(raw_response.text)
        except Exception:
            response = raw_response.text
        return response
    else:
        return raw_response.status_code

Test query.

In [6]:
test_articles = get_gdelt_articles(test_params)
test_articles

{'articles': [{'url': 'https://news.republika.co.id/berita/stideo451/mesir-bantah-akan-relokasi-warga-palestina-ke-sinai-utara',
   'url_mobile': '',
   'title': 'Mesir Bantah akan Relokasi Warga Palestina ke Sinai Utara',
   'seendate': '20250322T073000Z',
   'socialimage': 'https://static.republika.co.id/uploads/images/inpicture_slide/_250305044500-609.png',
   'domain': 'news.republika.co.id',
   'language': 'Indonesian',
   'sourcecountry': 'Indonesia'},
  {'url': 'http://www.misr5.com/uncategorized/1694.html',
   'url_mobile': '',
   'title': 'مصر ضد سيراليون .. موعد المباراة والقنوات الناقلة والتشكيل وترتيب المجموعة',
   'seendate': '20250324T203000Z',
   'socialimage': 'http://www.misr5.com/temp/resized/medium_2025-03-24-8f0c0a9f26.jpg',
   'domain': 'misr5.com',
   'language': 'Arabic',
   'sourcecountry': 'Egypt'},
  {'url': 'http://www.misr5.com/uncategorized/2371.html',
   'url_mobile': '',
   'title': 'فنان شاب .. تعرف على ضيف الحلقة 25 من برنامج رامز إيلون مصر',
   'seenda

Create GDELT articles to summary transformation.

In [7]:
def get_summary(user_prompt, gdelt_articles):

    # Initialize Cohere client
    with open("key.txt") as f:
        COHERE_API_KEY = f.read()
    cohere_client = cohere.ClientV2(COHERE_API_KEY)

    # Define the system parameters
    system_prompt = f"""
    The user will give you a list of articles from GDELT including the URL, Date and Title.
    The user has previously asked:
    {user_prompt}
    Respond with a brief summary responding to the users previous question.
    Base your response only on the information from the given articles.
    Recommend the user between 1 and 5 most relevant articles including the URLs and titles. Translate the titles to English if necessary.
    Respond only with the summary and recommended articles.
    """

    # Check if model input is string
    if not isinstance(gdelt_articles, str):
        gdelt_articles = str(gdelt_articles)

    # Get the model response
    raw_response = cohere_client.chat(
        messages = [
        {
            'role': 'system',
            'content': system_prompt  
        }, 
        {
            'role': 'user',
            'content': gdelt_articles
        }
        ],
        model='command-r-plus'
    )

    # Check if the response is valid and return the response
    try:
        response = raw_response.message.content[0].text
    except Exception:
        response = raw_response
    return response

Test transformation.

In [8]:
test_summary = get_summary(test_prompt, test_articles)
test_summary

"Egypt is in the spotlight for its sports, politics, and ancient history. The Egyptian national football team faced Sierra Leone in a crucial World Cup qualifier, with Mohamed Salah leading the lineup. Egypt's parliament has been busy, referring international agreements and bills to committees. In other news, a mysterious ancient pharaoh's tomb was discovered, offering insights into Egypt's rich past. \n\nRecommended articles: \n\n1. 'مصر ضد سيراليون .. موعد المباراة والقنوات الناقلة والتشكيل وترتيب المجموعة' - misr5.com - Arabic\n2. 'مشاهدة مباراة مصر وسيراليون في تصفيات مونديال 2026 .. بث مباشر الآن ( فيديو ) ' - dostor.org - Arabic\n3. 'مصر & سيراليون .. مواجهة مصيرية في تصفيات المونديال .. محمد صلاح يقود التشكيل المتوقع للمنتخب الوطني .. بوركينا فاسو تطارد الفراعنة .. وهذه سيناريوهات التأهل لكأس العالم 2026' - vetogate.com - Arabic\n4. 'مجلس النواب يُحيل تعديلات أحكام قانون المرور للجان المختصة' - shorouknews.com - Arabic\n5. 'جبالي يحيل قرارات باتفاقيات دولية إلى لجنة الشئون الدست

Create PDF generation function for summary download.

In [12]:
def generate_pdf(user_prompt, summary):
    
    # Initialize PDF
    pdf = FPDF()
    pdf.add_page()
    
    # Set up PDF header
    pdf.set_font("Helvetica", style="B", size=16)
    pdf.cell(0, 10, "World Events News Summarizer", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align="C")
    pdf.ln(10)
    
    # Add user prompt
    pdf.set_font("Helvetica", style="I", size=12)
    pdf.multi_cell(0, 10, user_prompt.encode('latin-1', 'replace').decode('latin-1'))
    pdf.ln(10)

    # Add summary
    pdf.set_font("Helvetica", size=12)
    pdf.multi_cell(0, 10, summary.encode('latin-1', 'replace').decode('latin-1'))
    
    # Return the PDF as a bytes buffer
    pdf_buffer = bytes(pdf.output())
    return pdf_buffer

Test generation.

In [13]:
test_pdf = generate_pdf(test_prompt, test_summary)
test_pdf

b'%PDF-1.3\n1 0 obj\n<<\n/Count 1\n/Kids [3 0 R]\n/MediaBox [0 0 595.28 841.89]\n/Type /Pages\n>>\nendobj\n2 0 obj\n<<\n/OpenAction [3 0 R /FitH null]\n/PageLayout /OneColumn\n/Pages 1 0 R\n/Type /Catalog\n>>\nendobj\n3 0 obj\n<<\n/Contents 4 0 R\n/Parent 1 0 R\n/Resources 8 0 R\n/Type /Page\n>>\nendobj\n4 0 obj\n<<\n/Filter /FlateDecode\n/Length 734\n>>\nstream\nx\x9c\x85\x94Qo\xda0\x10\xc7\xdf\xf7)\xeeiP\xa9u\x13\'!\xc9^\xd06\xb5\xd2\xa6m\xd2V\xa4\xbd\xf4\xc5$\x86xK\xec\xd4v\x8a\xd8\xa7\xdf9\x86@h\xd6!\x10\xc6\xb1\xff\xf7\xbb\xff\xddA\xe1\xf3\x9b\x80$)\xec\xde|X\xc1\xed}\x08\xe1\x82\x04\x01\xac6p\xb7r[a\x9a\x90(\x834\x8f\xdd\xa9U\t\xf3\x9fJ\xd7%\xdc=si\r|\xe3;\x03\x0f]\xd30-\xfep}\x05\xab_\x87\x8b\xb7\xf7\x14B:\xd2\x8aB\x12\xa2T\x94\x93 \xf3R\x15\xb33\x03\x15k[.\x85\xdc\x82\x90p\xb7\xdd\xb7\x16\xb4\xd8V\x16\xa4\xda-G\x92\xd1H\xf2\t\x06\xd1EF\x1d&\x8a\x86d\x91\xe5\xb0\xda\xc1\xdc+\t\xe3Tm\xc5\xc1\xb4\xca\xd6\xbd\xeeFi\x10H\x8f;\xda\x9akhU-\xac(p\xc5d\x89\x9fB`rP\tc\x95\xde\x13X\xe1\xe

In [14]:
with open("test.pdf", "wb") as f:
    f.write(test_pdf)