In [5]:
import requests
import pandas as pd
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_cutout(outfile, pos, size=30, low=False, dr3=False, verbose=False, auth=None):
    """
    Get a cutout at position pos with size size arcmin. Save the FITS file to outfile.
    """
    base = 'dr3' if dr3 else 'dr2'
    url = 'https://lofar-surveys.org/'
    page = base + '-low-cutout.fits' if low else base + '-cutout.fits'
    full_url = url + page

    if verbose:
        print(f"Trying {full_url} with params={{'pos': {pos}, 'size': {size}}}")
    response = requests.get(full_url, params={'pos': pos, 'size': size}, auth=auth, stream=True)
    
    if verbose:
        print(f"Received response code {response.status_code}")
    if response.status_code != 200:
        raise RuntimeError(f"Status code {response.status_code} returned")
    if response.headers.get('content-type') != 'application/fits':
        raise RuntimeError("Server did not return a FITS file, possibly no coverage for this area")

    with open(outfile, 'wb') as f:
        f.write(response.content)

def download_worker(index, row, size, low, dr3, verbose, auth):
    """
    Worker function to download a single cutout and return status.
    """
    ra = row['RA (degrees)']
    dec = row['Dec (degrees)']
    pos = f"{ra},{dec}"
    outfile = f"cutout_{index}.fits"
    base = 'dr3' if dr3 else 'dr2'
    url = 'https://lofar-surveys.org/'
    page = base + '-low-cutout.fits' if low else base + '-cutout.fits'
    full_url = url + page

    try:
        get_cutout(outfile, pos, size=size, low=low, dr3=dr3, verbose=verbose, auth=auth)
        return index, full_url, None  # Success
    except Exception as e:
        return index, None, str(e)  # Failure


In [6]:
def process_excel_multithreaded(input_file, output_file, size=30, low=False, dr3=False, verbose=False, auth=None, max_workers=5):
    """
    Read RA and Dec from an Excel file, download FITS files in parallel, and update the file with links.
    """
    df = pd.read_excel(input_file)
    if 'RA (degrees)' not in df.columns or 'Dec (degrees)' not in df.columns:
        raise ValueError("The input Excel file must have 'Ra (degrees)' and 'Dec (degrees)' columns.")
    
    # Prepare for multithreading
    df['Download Link'] = ''
    futures = []

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for index, row in df.iterrows():
            futures.append(executor.submit(download_worker, index, row, size, low, dr3, verbose, auth))
        
        for future in tqdm(as_completed(futures), total=len(futures), desc="Downloading files"):
            index, link, error = future.result()
            if link:
                df.at[index, 'Download Link'] = link
            elif error:
                print(f"Failed to download row {index}: {error}")

    # Save updated DataFrame back to Excel
    df.to_excel(output_file, index=False)
    print(f"Updated Excel file saved to {output_file}")


In [7]:
input_file = "coordinates.xlsx"  # Path to input Excel file
output_file = "output_file.xlsx"  # Path to save updated Excel file


In [None]:
process_excel_multithreaded(input_file, output_file, size=30, low=True, max_workers=10)


Downloading files:   0%|          | 0/22000 [00:00<?, ?it/s]

Failed to download row 4: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 9: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 3: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 5: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 8: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 1: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 2: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 6: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 10: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 12: Server did not return a FITS file, possibly no coverage for this area
Failed to download row 11: Server did 