In [None]:
# For Tasic 2018 (SRP150473)
wget -O tasic2018_metadata.tsv "https://www.ebi.ac.uk/ena/portal/api/filereport?accession=SRP150473&result=read_run&fields=run_accession,fastq_ftp,fastq_md5&format=tsv&download=true"

## Simple Download

In [None]:
# Extract SRR numbers and create download commands
cat tasic2018_metadata.tsv | tail -n +2 | while IFS=$'\t' read -r run_acc fastq_urls; do
    IFS=';' read -ra urls <<< "$fastq_urls"
    for url in "${urls[@]}"; do
        echo "wget -c 'ftp://$url'"
    done
done > download_commands.sh

## Checks for existing files before adding them to the download list

In [None]:
# Extract SRR numbers and create download commands, checking for existing files
cat tasic2018_metadata.tsv | tail -n +2 | while IFS=$'\t' read -r run_acc fastq_urls; do
    IFS=';' read -ra urls <<< "$fastq_urls"
    for url in "${urls[@]}"; do
        # Extract filename from URL
        filename=$(basename "$url")
        # Check if file exists and is not empty
        if [ ! -s "$filename" ]; then
            echo "wget -c 'ftp://$url'"
        else
            echo "# Skipping $filename - already exists" >&2
        fi
    done
done > download_commands.sh

## Add an MD5 check if you want to verify existing files

In [None]:
# Extract SRR numbers and create download commands, checking for existing files
cat tasic2018_metadata.tsv | tail -n +2 | while IFS=$'\t' read -r run_acc fastq_urls md5s; do
    IFS=';' read -ra urls <<< "$fastq_urls"
    IFS=';' read -ra checksums <<< "$md5s"
    
    for i in "${!urls[@]}"; do
        url="${urls[$i]}"
        md5="${checksums[$i]}"
        filename=$(basename "$url")
        
        # Check if file exists and has correct MD5
        if [ -f "$filename" ]; then
            existing_md5=$(md5sum "$filename" | cut -d' ' -f1)
            if [ "$existing_md5" = "$md5" ]; then
                echo "# Skipping $filename - already exists with correct MD5" >&2
                continue
            else
                echo "# $filename exists but MD5 mismatch - will redownload" >&2
            fi
        fi
        echo "wget -c 'ftp://$url'"
    done
done > download_commands.sh

## Use HTTPS instead of FTP, which is generally more reliable

In [None]:
cat tasic2018_metadata.tsv | tail -n +2 | while IFS=$'\t' read -r run_acc fastq_urls; do
    IFS=';' read -ra urls <<< "$fastq_urls"
    for url in "${urls[@]}"; do
        # Convert FTP URL to HTTPS
        https_url=$(echo "$url" | sed 's|^|https://ftp.sra.ebi.ac.uk/|')
        echo "wget -c '$https_url'" 
    done
done > download_commands.sh

## Execute

In [None]:
chmod +x download_commands.sh

parallel -j 32 < download_commands.sh