In [9]:
import os
import glob
import pandas as pd

# Directory path
directory = '/home/matias/Documents/media_monitor/data/rss_slices/'

# Find all CSV files in the directory
file_pattern = os.path.join(directory, '*.csv')
csv_files = glob.glob(file_pattern)

print(f"Found {len(csv_files)} CSV files.")


Found 122 CSV files.


In [10]:
# List to hold DataFrames
dfs = []

for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate all DataFrames
combined_df = pd.concat(dfs, ignore_index=True)


In [11]:
# Drop duplicates on all columns
deduped_df = combined_df.drop_duplicates()

# OR drop duplicates based on specific columns
deduped_df = combined_df.drop_duplicates(subset=['Title', 'Source'])


In [12]:
deduped_df.shape

(6032, 7)

In [13]:
deduped_df['day'] = pd.to_datetime(deduped_df['Published'], format='mixed').dt.strftime('%Y-%m-%d')

deduped_df.groupby('day').size().tail(30)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduped_df['day'] = pd.to_datetime(deduped_df['Published'], format='mixed').dt.strftime('%Y-%m-%d')


day
2025-05-12      2
2025-05-13      1
2025-05-14      7
2025-05-15      3
2025-05-16      2
2025-05-18      4
2025-05-19      1
2025-05-20      6
2025-05-21      4
2025-05-22      1
2025-05-23      9
2025-05-24      9
2025-05-25     18
2025-05-26     92
2025-05-27     94
2025-05-28    246
2025-05-29    452
2025-05-30    473
2025-05-31    258
2025-06-01    222
2025-06-02    466
2025-06-03    496
2025-06-04    544
2025-06-05    540
2025-06-06    490
2025-06-07    276
2025-06-08    199
2025-06-09    474
2025-06-10    459
2025-06-11     91
dtype: int64

In [14]:
links = deduped_df.sort_values('day').tail(20)['Link'].values
links = list(links)


In [15]:
import os
import time
import json
import pyperclip
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# links = [
#     "https://example.com/page1",
#     "https://example.com/page2",
#     # ...
# ]

output_file = 'scraped_links.jsonl'
sleep_time = 5  # seconds
page_timeout = 15  # seconds

scraped_results = []

for idx, url in enumerate(tqdm(links)):
    options = Options()
    options.add_argument("--start-maximized")
    # options.add_argument("--headless")  # Uncomment to run without UI

    driver = None
    try:
        driver = webdriver.Chrome(options=options)
        driver.set_page_load_timeout(page_timeout)

        driver.get(url)
        time.sleep(sleep_time)  # Let dynamic content load

        body = driver.find_element(By.TAG_NAME, "body")
        body.send_keys(Keys.CONTROL, 'a')
        body.send_keys(Keys.CONTROL, 'c')
        time.sleep(1)  # Clipboard delay

        scraped_html = pyperclip.paste()

        scraped_results.append({
            'index': idx,
            'url': url,
            'scraped_data': scraped_html
        })
        print(f"✅ Fetched data from: {url}")

    except Exception as e:
        print(f"⚠️ Error scraping URL: {url}\n{e}")

    finally:
        if driver:
            driver.quit()

# Save results
with open(output_file, 'w', encoding='utf-8') as f:
    for record in scraped_results:
        json.dump(record, f, ensure_ascii=False)
        f.write('\n')

print(f"Done! {len(scraped_results)} pages saved to {output_file}.")


  0%|          | 0/20 [00:00<?, ?it/s]

✅ Fetched data from: https://news.google.com/rss/articles/CBMiqAFBVV95cUxPT0IxazktYVV0elU3b25RVzRrdm1oeDBnVDZja1F6VmF2SDF1WFIyRW9BaGtwOFZrMFNMSnJPWmNmUWJScjhhZ3hJbTBNR0s5UjNWVGpfOXpSdTliU1c1WExUdEpxSTcyaHg4QWZpQnY5SDloUG53TDN6RFNXbkt6MXNmUUhNVzVxSkhzVXBXOHlfV3g1VC1FRXZiYTd2OXh1M0wySTIwZjHSAcMBQVVfeXFMTkNOd0xpU0xORV82ZzBqSjl6Q2RsWkl6VEV5Q3dZZGhkZnM1UGZOTkNfdGcxOGlDOTJfbkhzc3g3b29zVTBDUjA2bkpZanZ1SXZFeVVYZjFVUVU1a3ZyUERTZ1JZd01aaUduaTh4S0NLcHVJamtMeUxIblEybWh2QmV4Zml3QjU4SWdTR2xrYjczcTl5RlQ5Y3FJRE00bUFYWmFPcEZwcjZiMlhkck9PeWVtSGtjaHJaVWprTFVadWdUX3Yw?oc=5


  5%|▌         | 1/20 [00:20<06:29, 20.51s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMirgFBVV95cUxQaFc0R2xqdFk2QzNNSkJMcXRuTEdGWUY3ZVZoYXNxY243aVJGTFpVVE4xMFpWZWtHY3hGSkxrbF9IVzlkRjJUdVZucU8xWHhqelZwYW5iYXlOTjlhRVpEZFIxVXNCT3EyanJueTRCc0ozMzF1N3RTQ213THp5WTdac1c2bjNnV2xEeVppQUFPQWRyRmc1VGFrbTFEblVpY3dfNWZ0NnVHYTRNZVlEeEHSAcgBQVVfeXFMUHI0WndldXFMV2pQb2VJSzJzS2VORTRfMmg1RVh2M1lHUmxZUTUyNHpEcDlTZFUxcV9mU1ktZ0pJNUVpMUZ2ZnVyUjN1eGVsVm56amN2V3daVUNTaWpJbm93M3NNcUczeWE1T1k2bkFQcVI0WDEwcy1selFkV0RTN2lxcnp5RkNQM0tkZDlEQW1WNlYzM3VIX2xfcWNfREp5OGxOWXM4b3hPdk1CRk5LM0dFMk5yQnRtR2RSOGJ0Nzd5a2daSmlVQmo?oc=5


 10%|█         | 2/20 [00:41<06:13, 20.78s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMiowJBVV95cUxNSDZmVzVkV2RYTnhBdloxLVZ3bGU1NVQ2RWtaRS1LOHFmUXMtQUluZ0J2Vkk5bkNjODlDbjNBbjloTDB1SWJWa0pQRVF0QlhWWXZjMGYtZWhOUkdMMDBtcDlHOUNDaTZob2pweG91R1ZSY2o1QmV3WGp1THkzSXg3c1JFTGJMS1dTVTI2cDRObE12QzFOQWJrem5fSFd2Ml83eklVdDdnY3d6dDVNNWkteV84czVkVUZHcGVWeUs4MmxGUVU2NUpsV2t3THlkOWV4TFFWSWlKbkhBNmNZTy1iNDRrLVBiUVRqc0FaNXB0eUt2OHdXb2E3Mk14djF1UG1IMW1KREtfSFlxNW9WbkttdVBJMXVnX1FTMm1KOGcyUGh1WEU?oc=5


 20%|██        | 4/20 [01:11<04:22, 16.42s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMi0gFBVV95cUxPYnVxY3YweENqUnRuWElNVll2UHVJMENxTThlX3JYZ181M2pXdkU1dmV1ajR0dTAxZDJYTU1yZlFsWGFteEtkLWxFb1NDaFp1eUVWUGZPMjA4ZEJiYnZUb2QxNEU4c0paTVFXYlpEc0RObzJLVUJGNk1raUpTSWZoVFh2aURHLTh5NEZfRnhBS2VESXpJTmM3eWtqcEc5ZkFfRUd4QWo0dDNxT3hvTXAwem4zTHpZWXVMZ1JLUzE3UkdUSW9yMVZQVXIxTlRGd2JaVWc?oc=5
✅ Fetched data from: https://news.google.com/rss/articles/CBMioAFBVV95cUxOaUppQ09KaXZOWDJQc2xpYlBwZ21EY3ZrM3dvdl9WXzAtTk4xY2NTTUVJSDl5S0NoaHBjU3oybDE4RWNJeDBxWmlzOVdLclBJVjBxcmgxaWRROTRmaDRlLVZuWDZSdnVGdkVGS2xZNHAtWkprSVFXeWRWOFhaWVVBYkg4N1lIdzBUNnhSbEg0VEpNSU5VRFY0WU9KRE9Da2l10gGmAUFVX3lxTE41NW91aTZKd1FCLWk1OFpvZno3N21lcVdRYWc0WTV2bXFDemR3YTN5MVpwSF9lNGExaENZUXpKTy1PeFJnQWd2MnRGd0NyVmN6clVqS3c0dzV3eEFVbkQyUHNQOFNuT1BxZVJNQzlMZDh4NWdvdVJsZnJFYTVRVUN3WlFhSjRNU2FoWnRpWHJEVVhLd1owZE02MEw1RTdiZVFZUkZidmc?oc=5


 25%|██▌       | 5/20 [02:42<10:48, 43.26s/it]

⚠️ Error scraping URL: https://news.google.com/rss/articles/CBMi-gFBVV95cUxPVkdHR3FXLXF5ZEN0RDd1cW5yeTlYNmh4cEJEYzFubWQtQ1lCamRjdVNtOHhCSHNha0hMNk5BY2Z2dmI0RGxhanhQT3lFbDlTcUFNVW9nZkhjb0R1R0h5djRJekRVR29keVo1bUpfWmdBYm1JR09JV2NfUi1QWjZrSF9mbUpJRHM3b0dLMnAzNDBhNG9BblZzUm1aMXNPR0ZDTWRGTThLSGNjSzZHS2xxanZmQWk3WW13X0tuV3dCcmlROGlTUDFaVUNRNWF6WU42V2cwUUExQ2FGSzY0cWRMc2FrYWIxRzU0d2ZWWXV5TjFvZXZDTEtROHV30gGUAkFVX3lxTE90NGlncExYaVdYQ2NMRFM2N0V6czZiS3dzdkpXZEJRMjJwZFExaGJyTS1LMDhjVFBnTW5jX2syYk9lUVVpZEtGQkt1TjhHN3ZmQTRBd2ZIZEoxcW1mMmdHQ2dqMFhVeTNEM2N3SmZCam5rUGxITEVHYW42Q25Wai1zNTFnMTNaOEU0TXo3MWVjQUdMTlZ3TzRSUXlCMnd2Q3p5NnYzYTVBSXlCSEg1TlN4NWlUQ3ZHODh4c3VBTDlUcEtTN0JILU9TMGRTQ1VLZWJDRkFpOV8yUE5VU0NMckswMnJFQmIzMlpERC1oOW5yVE1GaFpROVZLVkp1a3RyMm1QTGpyNVcxV2JqUFdnVHFWSC13Yw?oc=5
Message: timeout: Timed out receiving message from renderer: 15.000
  (Session info: chrome=132.0.6834.83)
Stacktrace:
#0 0x58b307c8153a <unknown>
#1 0x58b30777cf00 <unknown>
#2 0x58b3077651b7 <unknown>
#3 0x58b307764ec2

 30%|███       | 6/20 [03:07<08:37, 37.00s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMimwFBVV95cUxPU0lYMnUwSEFUTlZCQXZGYUw2RmxtbFU5el9CTFRIV0Vtd2MwM3ZnWUFmamtNdXZoNnRjTEZyQVlCY3BENldZMjFCNGtVRVRDcldmaWU3d3NxaG5ScmtRT3ZnRmg1cnpvV2dZQ0NUX2JpZE5WZDVOMi03OUtpcUF3OWQ3SkJUY2hUNGJSSktyaWo1ZndGWWpwUmw4UQ?oc=5


 40%|████      | 8/20 [03:37<04:56, 24.72s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMiugFBVV95cUxQay1ITzUwSWFMQ0o4eGZZbVk4UF80RnpldkFsN2NyNjhNdm9aNEtSR1pGc1AtVFhnMERpNkxlaFp4VlRYSy0ySWNIQnV2cXV6NmJIY1BZY25xanpJSVBTTHl0Qnp6UmdpWGJaWFdrNHNDNFRqX3dNcE9CVlU1QVkzX01YOHhFUDVYelZzN0FYeHRTeWY3a096d1JkeHQ0ejJGdm1LajhPTU1IQlZ2VzBkZmEyWkV1b2RHemfSAboBQVVfeXFMUGstSE81MElhTENKOHhmWW1ZOFBfNEZ6ZXZBbDdjcjY4TXZvWjRLUkdaRnNQLVRYZzBEaTZMZWhaeFZUWEstMkljSEJ1dnF1ejZiSGNQWWNucWp6SUlQU0x5dEJ6elJnaVhiWlhXazRzQzRUal93TXBPQlZVNUFZM19NWDh4RVA1WHpWczdBWHh0U3lmN2tPendSZHh0NHoyRnZtS2o4T01NSEJWdlcwZGZhMlpFdW9kR3pn?oc=5


 45%|████▌     | 9/20 [03:47<03:40, 20.07s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMirAFBVV95cUxQWTg4aWNId2g1SUk0Nkx4azRZN1gyNDJBSDNyUWNza1VhdWcxR05rTHp5ZWRHRlRnNHBOYXE5UVhwTFdCcmVlWTBwYjc4R1hDbDBGM2NrZXpTUEFmUTBnVnZjcXEzXzdiTWEzVjU1Yl9GM1VPdjNKWkFhOEsxeTY1YUFmbHgtRGVjVnlyQ1RLM05jODhCbUpRNkc3cFpxUkV1dGE3UFBWVDBLTGFG?oc=5
⚠️ Error scraping URL: https://news.google.com/rss/articles/CBMizwFBVV95cUxPM1ZkMDJyckFaRW1kZUpMeVRVMm41cFdnanVKWnNQWWlLd18ycUpENWhCU01OWUVrRzNneVd1STZCeWNuVkh6N2N4MkU1d0dSTXB4RDhKZS16NVVlTUNiTEtWZFozWk1MN0pZc2E4YVRnT0dpVy01NTU4V1RvWGZoMGh3UTROY1p5blgzX1U3NGlGdGVLb3lDUVlqWXBLTEsyRGJPY283bm13ZFlFc0M5ekx2VXZ4Vmt4Nml2SlRZMVZYRGtwdXpfbFpOdEhpNGvSAdQBQVVfeXFMTlczdnhGOTM1eVd6OVMybTJrc3N3NVhzdWJ6UWNDdGhkWVJ5dU43SXNYUFN6TFlqczVKLUxORUVkSENpZkFyNTl3OVZIbzdnaXBLSnFTTTdpaEhvdVdseFpkZ2ZBc1k4WW9QNmdRUEJaZ2ROTTRqTDhRbmI4ekNBRG0wUnVsVVpPeG9EWXVNVnJjdGdKU2NGXzdwQXBmeGJIYm9rMHBpeEVMQ3RVRnpoYjVWZnNoRVl3YkxuSjVSbGRPS1pQWjdxN2REbG5Kd1QyVDl5YjE?oc=5
Message: timeout: Timed out receiving message from renderer: 15

 50%|█████     | 10/20 [04:14<03:42, 22.23s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMi4AFBVV95cUxOSDl4ZGRta09mM25nRXl5ZllVODBYSllZM0p4WDFzTUxfa2VNcmxPSDZfUi1VR2h1STNvV2Z4VEg2RS0yU3VScmZMQWV6Q3NQM19Mek9wbjh3VnRQRlAzUURmUHRvRkp2RDJpX3RsRVFBX3hrd0dQMnI3WDlfNWo0NHFabG00OEFxTXotSmZXUWJQVVFacEtTU1djWlpRdGs1NVl1T0ZmZ0NoUVNtdW1XTG1mYTRncGV3Z0g4dkFIWndwZVZKNTZGRnNoX2Y3TkJldVBsenhBZmRoN0QyaTZGSA?oc=5


 55%|█████▌    | 11/20 [04:25<02:48, 18.73s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMisgFBVV95cUxNWFJHVVZiS1ZWZjlpMEdEYUh4Sk8xVzJhOGZiVFlHWGRQaG9DWTZDM1lHczk2TVFFT0NnbmVPek00b1RGaVA3N0Vkd2NMZ2pFaGZIMVlBRld2WW1SQVNHQkNPUmZrTGdPUTRnWmNTcHN5c0Z5aUJ4b2MtRnd1bVBaV240QTcwQzQ0NDFKV2RZUTRpM0dGUlE0NzNiN0RaWms2cjMwbTFKY1h0RVFzQ3c4TVlR0gHGAUFVX3lxTFBRVEhySjhLTGhyQ0FwMkNQMXpTUmI0UzBIYkNLM2liY2Fuc2pqM1hPNDUyd2NkVmw3cktPWFJWckozdEp3NWZUQURWMVBhTHhuSEo4c3JjYi1uR3V6UFBBVFZsQUNPQ2FQRWdrMmxBalZzc2pzTnBuWUp5S3FCR1l2WnZWR19HSU12MWpNTmJra0EtaVZHYmROMEF0Y3dwbl8wV2ZDekVrYmhuczlWUE1BYjBCNzI2RlNLUTJtOUFOLUVtc3dEZw?oc=5


 60%|██████    | 12/20 [04:36<02:11, 16.49s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMi0wFBVV95cUxORHlIYUItWGthV192bXhYZ2xXRDNzVzdnMFpOV2VmTlgzRGhyUXBycXFFZlBQRWpmMGRqbm04VDBkbHVOYXF2dHhsNW9DS2gwQUkyV3o4ZlV3dC11NDdrWWs4aUlaakRzdDQ0Q1NiUnBydF83VUFhaHd5QldrY3VEOS1ud2QybWJXaE0tZlBzejJEZHFmdFFWb29sU21DSWxlZXlGQ1p3cG1YcWFKMUc3cWFNVWd0V3Q2ekpxYkZ0MnZOX1NRdk5WaFBCQ2k1Wk9ORXRN0gHuAUFVX3lxTFBoZW9QOGs4Tk92Q2JURXVmQTdySVB4Zll3d3JJcXZHaHhGTzRsWEI3TEcycUJXdTJ0dmtPUWVtUTdMVEZCdkc1WWlDdmJZaFdCUl81bF9ZeTZ5Qkw0cThpU3ozb3VObVNiTko3eXdXdkZvcEw3akhacFI4VG85ZHl1YlU4RzVXRlVGQTdRa24zTkM5d2IxQUhVNDhWSmVJcUtEYmJLdk9Pc29kR1lRVlhDek5vZDF3TzBXTk04NEstcm1qNVRaYm1qc2lVOFhaZjMzUldZSFB3UGlUMGFvWWwzWjNnc3JFeUNUWU9ZYXc?oc=5


 65%|██████▌   | 13/20 [04:56<02:02, 17.47s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMivwFBVV95cUxNX09aN3RyX3BxR1FpcmV0OHd6ZllhWlZfY1BDaHdTNE13d2FiYzNYajNjdnA1c3ZnMzlYVUh3OTVWWkNyX3VTNTUwUnBNdzZJb0VMMjZmNndkaGpJV0lUSEpiVnhBMDFPSEF4UnBxWmhHa0M1NmlPeWgxVU9MRE1DZXRUeF81UmhvQ1FJQmU0dWdMbjdyd2tESDEyTERTRktvZVp2ZS1mVHY1YjR2Um9zQ2wtZUY2MG5WYjM2MDc5cw?oc=5


 70%|███████   | 14/20 [05:17<01:51, 18.53s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMivwFBVV95cUxQQ2RRSU1XSTBOR3ZfUG9idWJHM3VOaDFWX1dnZV9pZWo3LTI3Y1NFdXdJdE9CZ19VMjE4R0JyVVBDeGdabEFhanZKSEg0a1hzOWNDV3VlNlBQVUo0eXJLcnFPcVM0c2JVeVJ0Sl9ldk9vR2xTaGlyN2t0bXNlTGduVHpBVGhNZTNKTFA2M0NFSkdMTTBBSTZ6bG9Ccm96bTRKMGlpUWd5dEwzWU93WkwzeF9LTzZBZnl2QXRKYzFFZ9IB0wFBVV95cUxOQVMxMzhYTll0dGROX1hXRVoyMXVfM2dwMUxFY2M3TkQ5Ti1pR0lWZ0Zjc2Y1RktWMmtxUGFlNHhaZmpFRnJoUFZBYU5lU2pKd1VoMzlmNlY4enplWU5OcXJULWhUcllLU2pfb0lrV2JJdUJfY2hTRFZOWDVOaThrZnRHYmZGV0lfSlp1LTd0a0c2T1hWNFdSdldkbjYzLXNsOEpxc3lJRk1HMEpRaGJ5QmpaMHE2Q3hzNUxBc1dZQkl5QUFWTUFUTVFsenRaaGJiVUxB?oc=5


 75%|███████▌  | 15/20 [05:37<01:35, 19.08s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMihwJBVV95cUxQNVRPeWx3TmJBWnduT2pwbjJCMHJINWJDX2hzdG1kM1Iwd3Y3bkp3WV9tUTJyZm9HajRWOEMxVkEzMXRVWmdQZ2wtY2hZZXI0NFVVM0oyUXhna2VEZ2pKbEc1VjV6ellDc2RFbzA1WWlpSmxzZm1fMWdnaUdhZnZqV1VrYThLVVJFbWtTQm9pdE5HVHJ1ZDN0czc5UHJubW9vU3YzdXg4X0hndFQweUMyQXBWN05GWHFTUDlZOWVPd0FjNERGVG9NaGJlZWFtQ3hQd2dHM0xHMjk0T1poc0NGOTA3WDZRWXNDY2hiN0YwczBZZDQ0aERoV3lRRmVoMmZEUkU4a0hlSdIBogJBVV95cUxOM0huRU5IbGpXZVNnZ0pUX1RKRUExbFRkTWVxeFhvUFdMWFZXbS00N2t4ZUtiZDRobGs0YndrOFJpY2x0UHJkVGNKRk81RUhsS2daSGEyOG90YTVKSTNuMHV5M3pmWlpFamFJOXhCV1BwTVBiUXNLODhZZTJqNFlsQ2FzSlJxUlJIX1NtMGd3VzgzTXpOTTJteGJhU1FUbGpGbmVQRTBUN21JSlMtckUtM0dtYWpfTVJndjBTeGV3NGpUckk3Y0NmYVdYY2FpRmg0YUppSXJBbHloRGoyZmZYb0laN3kwN3hwbk04U1RFajNVbmZhNjNPOUtDZWgxUUdFb2h6SUN4dW0waWMyb25MWkhndUFJLVd4dTVGN1VUT2ppdw?oc=5


 85%|████████▌ | 17/20 [06:11<00:52, 17.53s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMiowFBVV95cUxQUzdvVnUyVzRjbXFwQmlvWXBRMFlGbjB3T1BfUVpBVFdWNVdYOWhpQ2swWEJDLVRKMDVzbS1Yd25iZVVYSHROS1FIdmxNb0pGX2pESVlxOHktSGF2WGpsVm5iYUxBZW0yQWhYXzNNYWxfd2hxdWd5TEVHRGotMVJscFhjd2RaXzJ5d05OWVRvWi0tbl9ZeC1WNE5nNVVBZUhVRzNz?oc=5
✅ Fetched data from: https://news.google.com/rss/articles/CBMikwFBVV95cUxOWGN2c0dXSC1QaTBzQU56MUVUV3FNREE3ckJ2SW1zM2hMUDNxb01ubmlxRGxwWEN3VUtJTy1PWnlCSzROMWhIZ1RRZTFPUVA1eDJETy1uM3p6NmhPY09BZVIzU2FuZXNiZzQ2S21qcV81VlVaNmZYOUpRQ1o0enUzb0dqSEV1WWdCV3JEbUF4SHpxT0nSAZgBQVVfeXFMT1FtdjdneWdpYjg5Qk1QZHc1bXQ1VmQtTDFRdXZ6NEpvSkcxMkphYWxmQjZZSzJoTVh2ZEE3UnlSN2tlQUZLQi1ncHNrWjJBM244MWdSSE1yNmZGR1BCU0N2VHplTWl6bTBWYXBfQ3RLQ05wWklrV3UzYXZHeDhhVkhabG5EUHdpWUtrenkyWFlrSGhWaTZ6dFU?oc=5


 95%|█████████▌| 19/20 [06:56<00:19, 19.04s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMi8gFBVV95cUxONUgtU2ZVRldPTThfSG03UmdmdUhFbzkzVF9Ha2p2NW83QlFaaVpvNU9nVktHeFZ0NndnS1YzdFZjSDBPNExBRWZJNjhDR05NbmNjU1VKeGZFRER4RUhRamp2SGhMTlVwNzl5S0RKaldmYWZMZWRESDZTdmNUZUVFS19pcGdLM01FQmJsMWhWTWdKWEoxR2V4VXRYVjQ3aWJXRlN0cnJ3ZmlJVzVYZm1la3hlRmQ0MXBGYmx5SE9nMTRKdDlBUHUweV9rZEhITU5xeHF2NUZHT000dGc1WkZLRFp3OVZVQzduZW5hMm1QeDd5UQ?oc=5


100%|██████████| 20/20 [07:09<00:00, 17.16s/it]

✅ Fetched data from: https://news.google.com/rss/articles/CBMi0gFBVV95cUxQN2FhQWJLaVEyZmM4WTlSRlpNMnU5Wk1HajZYeU51TjlUUHhLdnkybkxLa1p0b2RmYzl6OGZRZ0wwUjZmb2pVOGFSOGoxZVB5Q2ZoZEd0aFZZVGhHbnlhNHp0YjJydVhvRndFUi1aZW1QWno3NFI3MU4zZVFmc3NrYnFNNkZPMW1PMUZwU0lxNkhfMzE2Um5TMGR2OGtqeEgyYnpNNmJGUXktZ1FYd29OamtVdk91QkxxVzQxdlhrVE40OEtZdW10Y2tBV0dNWHltT2fSAdcBQVVfeXFMTXVUc3BsQUZJSkxyaTloTmtDMkktRHE0bGhQRDA0eU9rakF4WTlmaGhZUDZyWk10am1OWUQ1bVBVZ0JIU205N1g0WE0xN0VZWUQxenptU3NEdVplUWI2X0ltOFEtZGNpU3pSSlJYUy1vdERRYTZoWnBPZUZZYmF4d0ZQQ0pDQnJXZUVPSXFkZnFwVnRRMmdNZkFHcE96OUt5M3EtOFBjelVwUk5ld0hHRFM3S0QtbWE0a0RrVDVZcGx1Y2lhbkd5Q2xUVzQ0ZWVCY1c4TWh1Yk0?oc=5


100%|██████████| 20/20 [07:09<00:00, 21.49s/it]

Done! 18 pages saved to scraped_links.jsonl.



