In [1]:
import requests
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv

In [2]:
load_dotenv(find_dotenv(), override=True)
factal_api_key = os.getenv('FACTAL_API_KEY')

In [3]:
def get_id(topic_name, kind=None, category=None):
    """
    Get the topic ID for a given location name from the Factal API.
    Args:
        topic_name (str): Name of the location to search for (e.g., "West Kordofan, Sudan" or "Sudan")
        kind (str): Topics are categorized predominantly into three kinds: "tag", "arc" and "location". The most common topic kind is "location"
        category (str): Location categories in order of granularity: "POI", "Airport", "Suburb", "Town", "Township", "NaturalFeature", "County", "State", "Colloquial", "Country"
    Returns:
        int or None: The topic ID if found, None otherwise
    """
    
    url = "https://www.factal.com/api/v2/topic/"
    params = {
        "name": topic_name,
        "kind": kind,
        "category": category
    }

    headers = {
        'Authorization': f'Token {factal_api_key}'
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        
        # Parse the JSON response
        data = response.json()
        if 'results' in data and len(data['results']) > 0:
            topic_id = data['results'][0]['id']
            print(f"Topic ID for {topic_name}: {topic_id}")
            return topic_id
        else:
            print(f"No results found for {topic_name}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error making API request: {e}")
        return None
    except ValueError as e:
        print(f"Error parsing JSON response: {e}")
        return None

In [4]:
# Example usage
topic_id = get_id("Khartoum, Sudan", "location", "Town")

Topic ID for Khartoum, Sudan: 336537


In [5]:
def get_items_for_topic(topic_name=None, kind=None, category=None, topic_id=None, page_size=100, limit=None, start_date=None):
    """
    Retrieve items for a given topic from the Factal API.
    
    Args:
        topic_name (str, optional): Name of the topic location to search for (e.g., "Sudan")
        kind (str): Topics are categorized predominantly into three kinds: "tag", "arc" and "location". The most common topic kind is "location"
        category (str): Location categories in order of granularity: "POI", "Airport", "Suburb", "Town", "Township", "NaturalFeature", "County", "State", "Colloquial", "Country"
        topic_id (int, optional): Topic ID if already known
        page_size (int): Number of items per page (max 100)
        limit (int, optional): Maximum number of items to retrieve
        start_date (str, optional): ISO date format (YYYY-MM-DD) to filter items from
        
    Returns:
        pandas.DataFrame: DataFrame containing all items
    """
    
    # If we don't have a topic_id but have a name, get the ID first
    if topic_id is None and topic_name is not None:
        topic_id = get_id(topic_name, kind, category)
        if topic_id is None:
            print(f"Topic ID for {topic_name} not found.")
            return pd.DataFrame()  # Return empty dataframe if topic not found
    elif topic_name is not None and topic_id is not None:
        print("Both topic_name and topic_id provided. Using topic_id.")
    
    if topic_id is None:
        print("Error: Either topic_name or topic_id must be provided")
        return pd.DataFrame()
        
    # Base URL for items endpoint
    url = 'https://www.factal.com/api/v2/item/'
    
    # Set up parameters
    params = {
        'topics': str(topic_id),
        'kind': str(kind) if kind else None,
        'category': str(category) if kind and category else None,
        'page_size': page_size
    }
    
    # Add optional date filter if provided
    if start_date:
        params['date__gte'] = start_date
    
    headers = {
        'Authorization': f'Token {factal_api_key}'
    }
    
    all_results = []
    next_url = url
    items_retrieved = 0
    
    try:
        while next_url:
            print(f"Fetching data from: {next_url}")
            response = requests.get(next_url, headers=headers, params=params)
            response.raise_for_status()
            
            data = response.json()
            results = data.get('results', [])
            all_results.extend(results)
            
            items_retrieved += len(results)
            print(f"Retrieved {len(results)} items. Total: {items_retrieved}")
            
            # Check if we've reached the limit
            if limit and items_retrieved >= limit:
                all_results = all_results[:limit]
                break
                
            # Get the next page URL
            next_url = data.get('next')
            
            # If moving to next page, we don't need params anymore (they're in the URL)
            if next_url:
                params = {}
            
        # Convert to DataFrame
        df = pd.DataFrame(all_results)
        return df
        
    except requests.exceptions.RequestException as e:
        print(f"Error making API request: {e}")
        return pd.DataFrame(all_results) if all_results else pd.DataFrame()
    except ValueError as e:
        print(f"Error parsing JSON response: {e}")
        return pd.DataFrame(all_results) if all_results else pd.DataFrame()

In [6]:
# Get only items starting from 2025
news_items = get_items_for_topic(
    topic_name="West Kordofan, Sudan",
    topic_id=None,
    kind="location", # None / "location" / "arc" / "tag"
    category="State", # None / "POI" / "Country" / "State" / "Town" / "Township" / "Suburb" / "NaturalFeature" / "Colloquial" / "Airport"
    start_date="2025-01-01", 
    limit=50
)
  
news_items

Topic ID for West Kordofan, Sudan: 2239161
Fetching data from: https://www.factal.com/api/v2/item/
Retrieved 9 items. Total: 9


Unnamed: 0,id,resource_uri,permalink,slug,topics,tweet,media,feed,url_domain,url,...,submitter,severity,type,status,pushed,pushed_major,pushed_emerging,tweeted,pushed_individual,uuid
0,51764157,https://www.factal.com/api/v2/item/51764157/,https://www.factal.com/item/2025/06/01/more-th...,more-than-2700-families-displaced-by-fighting-in,"[{'item': 51764157, 'topic': {'id': 326230, 'r...",,,,sudantribune.net,https://sudantribune.net/article301513/,...,editor,3,text,published,True,False,False,False,False,
1,51698823,https://www.factal.com/api/v2/item/51698823/,https://www.factal.com/item/2025/05/31/more-su...,more-sudanese-government-claims-wednesday-rsf,"[{'item': 51698823, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,
2,50689405,https://www.factal.com/api/v2/item/50689405/,https://www.factal.com/item/2025/05/03/sudans-...,sudans-government-claims-rsf-killed-300-civilians,"[{'item': 50689405, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,
3,50669707,https://www.factal.com/api/v2/item/50669707/,https://www.factal.com/item/2025/05/03/local-m...,local-media-video-appears-to-show-people-fleeing,"[{'item': 50669707, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,2,text,published,True,False,False,False,False,
4,50655527,https://www.factal.com/api/v2/item/50655527/,https://www.factal.com/item/2025/05/02/sudans-...,sudans-government-accuses-rsf-of-killing,"[{'item': 50655527, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,
5,50655301,https://www.factal.com/api/v2/item/50655301/,https://www.factal.com/item/2025/05/02/video-s...,video-sudans-rebel-rsf-releases-inmates-from,"[{'item': 50655301, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,
6,50641611,https://www.factal.com/api/v2/item/50641611/,https://www.factal.com/item/2025/05/02/at-leas...,at-least-19-people-dead-37-more-injured-in-nahud,"[{'item': 50641611, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,4,text,published,True,False,False,False,False,
7,48702707,https://www.factal.com/api/v2/item/48702707/,https://www.factal.com/item/2025/03/10/seven-p...,seven-people-reportedly-killed-in-rsf-strikes-on,"[{'item': 48702707, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,
8,48696109,https://www.factal.com/api/v2/item/48696109/,https://www.factal.com/item/2025/03/10/at-leas...,at-least-seven-people-killed-13-more-injured,"[{'item': 48696109, 'topic': {'id': 326230, 'r...",,,,x.com,,...,editor,3,text,published,True,False,False,False,False,


In [15]:
# Extract the topics column into a separate DataFrame
def extract_topics_to_df(df, column):
    # Create a list to store all topics
    all_topics = []
    
    # Check if 'topics' column exists
    if column not in df.columns:
        print("Error: 'Column not found in DataFrame")
        return pd.DataFrame()
    
    # Iterate through each row in the original DataFrame
    for index, row in df.iterrows():
        # Get the topics list for this row
        topics_list = row[column]
        
        # Skip if topics is None or empty
        if not topics_list:
            continue
            
        # Each topic is an object, add the item ID to keep track of relationships
        for topic in topics_list:
            topic_dict = dict(topic)  # Convert to dictionary
            topic_dict['item_id'] = row['id']  # Add reference to the original item
            all_topics.append(topic_dict)
    
    # Convert the list of topics to a DataFrame
    topics_df = pd.DataFrame(all_topics)
    
    return topics_df

# Create the topics DataFrame
topics_df = extract_topics_to_df(news_items, "topics")
topics = pd.json_normalize([topic['topic'] for topic in topics_df.to_dict('records')])
topics

Unnamed: 0,id,resource_uri,local_url,permalink,items_resource_uri,related_topics_uri,slug,parents,active,visible,...,wikipedia_content_date,latest_item_date,item_count,published_count,recent_count,daily_count,trend_rank_last,trend_rank_current,asset_count,subscription_count
0,326230,https://www.factal.com/api/v2/topic/326230/,/topic/incident-2550729/,https://www.factal.com/topic/incident-2550729/...,/api/v2/item/?topics=326230,/api/v2/topic/?related_to=326230,incident-2550729,[60969],True,True,...,,2025-06-15T20:45:47.140560Z,3978,3978,0,2,0,0,0,246
1,5213357,https://www.factal.com/api/v2/topic/5213357/,/topic/lkhw-sudan/,https://www.factal.com/topic/lkhw-sudan/?redir...,/api/v2/item/?topics=5213357,/api/v2/topic/?related_to=5213357,lkhw-sudan,"[406308, 1443, 1489717, 247481, 1489712, 851841]",True,True,...,,2025-06-01T23:11:18.835467Z,4,4,0,0,0,0,0,0
2,406308,https://www.factal.com/api/v2/topic/406308/,/topic/south-kordofan-sudan/,https://www.factal.com/topic/south-kordofan-su...,/api/v2/item/?topics=406308,/api/v2/topic/?related_to=406308,south-kordofan-sudan,"[1443, 1489717, 247481, 1489712, 851841]",True,True,...,2025-05-22T04:30:58.128483Z,2025-06-01T23:11:18.835467Z,94,94,0,0,0,0,45,5
3,1443,https://www.factal.com/api/v2/topic/1443/,/topic/sudan/,https://www.factal.com/topic/sudan/?redirect=true,/api/v2/item/?topics=1443,/api/v2/topic/?related_to=1443,sudan,"[1489717, 247481, 1489712, 851841]",True,True,...,2025-05-23T19:30:36.820915Z,2025-06-17T04:55:13.619380Z,5760,5760,0,2,0,0,5,155
4,2239161,https://www.factal.com/api/v2/topic/2239161/,/topic/west-kordofan-sudan/,https://www.factal.com/topic/west-kordofan-sud...,/api/v2/item/?topics=2239161,/api/v2/topic/?related_to=2239161,west-kordofan-sudan,"[1443, 1489717, 247481, 1489712, 851841]",True,True,...,,2025-06-01T23:11:18.835467Z,25,25,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,247481,https://www.factal.com/api/v2/topic/247481/,/topic/emea/,https://www.factal.com/topic/emea/?redirect=true,/api/v2/item/?topics=247481,/api/v2/topic/?related_to=247481,emea,[],True,True,...,2021-05-26T09:41:35.862595Z,2025-06-17T10:11:58.027576Z,311580,311580,10,41,0,0,0,178
96,1164277,https://www.factal.com/api/v2/topic/1164277/,/topic/asia/,https://www.factal.com/topic/asia/?redirect=true,/api/v2/item/?topics=1164277,/api/v2/topic/?related_to=1164277,asia,[],True,True,...,2020-09-30T07:13:03Z,2025-06-17T10:11:44.681316Z,192859,192859,0,0,0,0,0,131
97,851870,https://www.factal.com/api/v2/topic/851870/,/topic/western-asia/,https://www.factal.com/topic/western-asia/?red...,/api/v2/item/?topics=851870,/api/v2/topic/?related_to=851870,western-asia,[],True,True,...,,2025-06-17T10:09:27.554095Z,112756,112756,0,0,0,0,0,68
98,5016538,https://www.factal.com/api/v2/topic/5016538/,/topic/sudan-doctors-network-rapid-support-for...,https://www.factal.com/topic/sudan-doctors-net...,/api/v2/item/?topics=5016538,/api/v2/topic/?related_to=5016538,sudan-doctors-network-rapid-support-forces,[],True,True,...,,2025-03-10T06:34:39.377626Z,1,1,0,0,0,0,0,0


In [16]:
topics["name"].unique()

array(['Sudan civil war', 'الخوى، Sudan', 'South Kordofan, Sudan',
       'Sudan', 'West Kordofan, Sudan', 'Africa', 'MENA',
       'Northern Africa', 'EMEA', 'immigration', 'war & conflict',
       'GR63+M66, As Sulayk, Sudan', 'Khartoum, Sudan', 'drones',
       'En Nahud, Sudan', 'Rapid Support Forces', 'Al Jazeera', 'Asia',
       'Western Asia', 'Sudan Doctors Network: Rapid Support Forces'],
      dtype=object)