# Add Data Source to Catalog
This notebook guides you through adding a new data source to `catalog.csv`.

In [ ]:
# Ensure required packages
import importlib, subprocess, sys

def _ensure(pkg_name, import_name=None):
    try:
        importlib.import_module(import_name or pkg_name)
    except ModuleNotFoundError:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg_name])
    finally:
        globals()[import_name or pkg_name] = importlib.import_module(import_name or pkg_name)

for pkg in ('pandas','requests','feedparser'):
    _ensure(pkg)
print('Dependencies ready.')

In [ ]:
from pathlib import Path
import csv
import pandas as pd, requests, feedparser

BASE_DIR = Path.cwd() if Path('catalog.csv').exists() else Path.cwd() / 'data'
catalog_path = BASE_DIR / 'catalog.csv'
cat = pd.read_csv(catalog_path)

KEY_PHRASES = ['business','world news','economy','politics']

def detect_filetype(url, resp):
    u = url.lower()
    if u.endswith('.rss') or u.endswith('.xml'):
        return 'rss'
    if u.endswith('.json'):
        return 'json'
    if u.endswith('.csv'):
        return 'csv'
    ct = resp.headers.get('content-type','').lower()
    if 'json' in ct:
        return 'json'
    if 'csv' in ct or 'text/plain' in ct:
        return 'csv'
    if 'xml' in ct or 'rss' in ct:
        return 'rss'
    return None

while True:
    url = input('URL (blank to exit): ').strip()
    if not url:
        break
    try:
        resp = requests.get(url, timeout=15, headers={'User-Agent':'Mozilla/5.0'})
        resp.raise_for_status()
    except Exception as e:
        print('Error fetching:', e)
        continue
    ftype = detect_filetype(url, resp)
    if not ftype:
        print('Could not determine file type.')
        continue
    title = ''
    if ftype == 'rss':
        feed = feedparser.parse(resp.content)
        title = feed.feed.get('title','')
        text = ' '.join(' '.join(filter(None,[e.get('title',''), e.get('summary','')])) for e in feed.entries).lower()
        if any(p in text for p in KEY_PHRASES):
            print('Key phrase found in feed.')
    print('Detected type:', ftype)
    if title:
        print('Feed title:', title)
    row = {
        'category': input('Category: '),
        'source': input('Source: '),
        'filetype': ftype,
        'folder': input('Folder name: '),
        'url': url,
        'api_key': '',
        'cadence': input('Cadence (e.g., hourly): '),
        'last_fetched': '',
        'description': input('Description: '),
        'link': input('Link: ')
    }
    print('Proposed row:', row)
    if input('Add to catalog? [y/N] ').lower().startswith('y'):
        with open(catalog_path, 'a', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=cat.columns)
            writer.writerow(row)
        cat = pd.read_csv(catalog_path)
        print('Row added.')
    else:
        print('Skipped.')
