In [6]:
import logging
logging.basicConfig(
    level=logging.INFO,
    format='%(message)s'  # Simple format for cleaner output
)

# Import the async functions directly (NOT the regular crawler!)
from src.papers_crawler.crawler_async import crawl_async, discover_journals_async

# Discover available journals (use await since Colab runs in an async environment)
journals = await discover_journals_async()
print(f"Found {len(journals)} journals")

# Show first 5 journals
for slug, name in journals[:5]:
    print(f"  {slug}: {name}")

Loaded 62 journals from cache


Found 62 journals
  cell: Cell
  cancer-cell: Cancer Cell
  cell-chemical-biology: Cell Chemical Biology
  cell-genomics: Cell Genomics
  cell-host-microbe: Cell Host & Microbe


In [7]:
# Crawl specific journals (use await)
downloaded_files, articles = await crawl_async(
    year_from=2020,
    year_to=2025,
    out_folder="./papers",
    headless=True,
    limit=2,  # limit per journal
    journal_slugs=["cell", "immunity"],
)

print(f"Downloaded {len(downloaded_files)} PDFs")

🔍 Scanning 2 journal(s) for open access articles...

� Launching Firefox for journal: cell...

� Launching Firefox for journal: cell...
✅ Firefox browser ready for cell
✅ Firefox browser ready for cell
📂 Journal folder: ./papers/cell
🔎 Crawling journal: cell at https://www.cell.com/cell/newarticles
📂 Journal folder: ./papers/cell
🔎 Crawling journal: cell at https://www.cell.com/cell/newarticles


Found cookie consent button: button:has-text("Accept")
✓ Accepted cookie consent
✓ Accepted cookie consent


📚 Found 38 open access articles in cell (will download up to 2)

[A
[A📄 Found open-access article: Anti-BCMA CAR-T therapy in patients with progressive multipl...
📄 Found open-access article: Anti-BCMA CAR-T therapy in patients with progressive multipl...

[A
[A📄 Found open-access article: The genomic footprints of wildSaccharumspecies trace domesti...
📄 Found open-access article: The genomic footprints of wildSaccharumspecies trace domesti...


CancelledError: 

## Testing with Updated Code

The code has been updated to:
- Remove unnecessary `IN_COLAB` checks and `await asyncio.sleep(0)` calls
- Use `CLIProgressTracker` with tqdm disabled in Colab
- Show detailed logger messages for debugging

Restart the kernel and run the cells above to see the improved output.