In [1]:
# Load the data
import pandas as pd
df = pd.read_csv('sudan_results.csv')

In [None]:
df.head()

Unnamed: 0,MonthYear,Actor1Name,Actor2Name,SOURCEURL
0,202504,UNITED NATIONS,,https://www.newsday.co.zw/international/articl...
1,202504,PORT SUDAN,,https://www.globalsecurity.org/military/librar...
2,202504,GENEVA,,https://www.scoop.co.nz/stories/WO2504/S00324/...
3,202504,SADC,,https://www.newsday.co.zw/international/articl...
4,202504,SADC,,https://www.newsday.co.zw/international/articl...


In [6]:
from newsfeed.utils import fulltext as ft
import time
import tqdm as tqdm
import sys
import contextlib
import signal

def timeout_handler(signum, frame):
    raise TimeoutError("Request timed out")

# Create empty lists to store titles and full_text
titles = []
full_texts = []
successful_downloads = 0
failed_downloads = 0

# Get total number of entries
total_entries = len(df)

# Loop through each row in df with improved progress tracking
for index, row in tqdm.tqdm(df.iterrows(), total=total_entries, leave=True):
    with contextlib.redirect_stdout(None):
        url = row['SOURCEURL']
        start_time = time.time()  # Define start_time for each iteration
        
        try:
            # Set timeout alarm
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(15)  # 15 seconds timeout
            
            # Download and parse article
            article = ft.download(url=url)
            article.download()
            article.parse()
            
            # Clear the alarm
            signal.alarm(0)
            
            # Extract title and full text
            titles.append(article.title)
            full_texts.append(article.text)
            successful_downloads += 1
            
        except Exception as e:
            print(f"Error processing URL: {e}")
            # Add placeholder values for failures
            titles.append(None)
            full_texts.append(None)
            failed_downloads += 1
        
    # Add a small delay to avoid overloading servers
    time.sleep(0.5)
    # Check if this iteration took too long, but continue to next article rather than breaking
    if time.time() - start_time > 15:
        print(f"Timeout reached for URL: {url}")


# Add the new columns to df
df['title'] = titles
df['full_text'] = full_texts

# Print summary statistics
print(f"Download complete: {successful_downloads} successful, {failed_downloads} failed out of {total_entries} articles")

df



Timeout reached for URL: https://newsghana.com.gh/un-calls-for-immediate-ceasefire-amid-escalating-south-sudan-clashes/




Timeout reached for URL: https://newsghana.com.gh/un-calls-for-immediate-ceasefire-amid-escalating-south-sudan-clashes/




Timeout reached for URL: https://www.newspim.com/news/view/20250426000131




Timeout reached for URL: http://halgan.net/2012/02/uurdoox-taariikha-mashruuca-islaamiga-ah-ee-soomaaliya-sh-cabdiraxmaan-sh-cumar/




Timeout reached for URL: http://halgan.net/2012/02/uurdoox-taariikha-mashruuca-islaamiga-ah-ee-soomaaliya-sh-cabdiraxmaan-sh-cumar/




Timeout reached for URL: https://www.clevelandjewishnews.com/jns/trump-willing-to-lead-the-pack-in-strike-against-iran/article_a894c9d4-e9f0-5aeb-84df-c2fd5ad6e91c.html




Timeout reached for URL: https://newsghana.com.gh/un-calls-for-immediate-ceasefire-amid-escalating-south-sudan-clashes/




Timeout reached for URL: https://africa.com/sudan-turk-gravely-concerned-at-rising-civilian-deaths-and-widespread-sexual-violence-in-north-darfur/




Timeout reached for URL: https://africa.com/sudan-turk-gravely-concerned-at-rising-civilian-deaths-and-widespread-sexual-violence-in-north-darfur/




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3228105&language=ar




Timeout reached for URL: https://www.spacewar.com/afp/250428171832.9ro0wyyd.html




Timeout reached for URL: https://www.spacewar.com/afp/250428171832.9ro0wyyd.html




Timeout reached for URL: https://www.spacewar.com/afp/250428171832.9ro0wyyd.html




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227955&language=en




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227955&language=en




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227941&Language=ar




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227941&Language=ar




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227941&Language=ar




Timeout reached for URL: https://www.kuna.net.kw/ArticleDetails.aspx?id=3227956&Language=ar




Timeout reached for URL: http://www.businessghana.com/site/news/politics/326943/End-conflict-to-honour-Pope,-Vatican-diplomat-tells-South-Sudan




Timeout reached for URL: http://www.businessghana.com/site/news/politics/326943/End-conflict-to-honour-Pope,-Vatican-diplomat-tells-South-Sudan




Timeout reached for URL: https://www.standardmedia.co.ke/africa/article/2001517668/sudan-paramilitaries-shell-famine-hit-camp-kill-over-20




Timeout reached for URL: https://www.standardmedia.co.ke/africa/article/2001517668/sudan-paramilitaries-shell-famine-hit-camp-kill-over-20




Timeout reached for URL: https://www.standardmedia.co.ke/africa/article/2001517668/sudan-paramilitaries-shell-famine-hit-camp-kill-over-20




Timeout reached for URL: http://lecourrier-dalgerie.com/soudan-au-moins-31-civils-tues-a-omdurman/




Timeout reached for URL: https://www.opinionnigeria.com/before-you-destroy-someone-think-of-those-who-depend-on-him-or-her-by-isaac-asabor/




Timeout reached for URL: https://annabaa.org/arabic/authorsarticles/42304




Timeout reached for URL: http://lecourrier-dalgerie.com/soudan-au-moins-31-civils-tues-a-omdurman/




Timeout reached for URL: https://annabaa.org/arabic/authorsarticles/42304




Timeout reached for URL: http://www.businessghana.com/site/news/politics/326943/End-conflict-to-honour-Pope,-Vatican-diplomat-tells-South-Sudan




Timeout reached for URL: http://www.businessghana.com/site/news/politics/326943/End-conflict-to-honour-Pope,-Vatican-diplomat-tells-South-Sudan




Timeout reached for URL: https://www.standardmedia.co.ke/national/article/2001517706/kenya-denies-sudans-claims-as-tensions-over-rsf-links-escalate




Timeout reached for URL: https://www.standardmedia.co.ke/africa/article/2001517668/sudan-paramilitaries-shell-famine-hit-camp-kill-over-20




Timeout reached for URL: https://www.terradaily.com/reports/Sudan_paramilitaries_shell_famine-hit_camp_kill_over_20_999.html




Timeout reached for URL: https://www.terradaily.com/reports/Sudan_paramilitaries_shell_famine-hit_camp_kill_over_20_999.html




Timeout reached for URL: https://www.terradaily.com/reports/Sudan_paramilitaries_shell_famine-hit_camp_kill_over_20_999.html




Timeout reached for URL: https://www.standardmedia.co.ke/africa/article/2001517668/sudan-paramilitaries-shell-famine-hit-camp-kill-over-20




Timeout reached for URL: https://www.alnilin.com/13374386.htm


  2%|▏         | 1515/77167 [1:25:42<71:19:47,  3.39s/it]


KeyboardInterrupt: 