In [None]:
import requests
import importlib
import pandas as pd
from pathlib import Path
from collections import Counter

import brew_omics
importlib.reload(brew_omics)

report_expt, report_expt_path = brew_omics.fetch_encode_metadata(
    "data/entex/report_expt.pkl",
    "https://www.encodeproject.org/report.tsv?type=Experiment&status=released&internal_tags=ENTEx&limit=all",
    check_ids=['Accession'],
    skip_rows=1, sep='\t',
    data_type='Experiment')

report_sam, report_sam_path = brew_omics.fetch_encode_metadata(
    "data/entex/report_sam.pkl",
    "https://www.encodeproject.org/report.tsv?type=Biosample&status=released&internal_tags=ENTEx&limit=all",
    check_ids=['Accession'],
    skip_rows=1, sep='\t',
    data_type='Biosample')

metadata_file, metadata_file_path = brew_omics.fetch_encode_metadata(
    "data/entex/metadata_file.pkl",
    "https://www.encodeproject.org/metadata/?type=Experiment&status=released&internal_tags=ENTEx&limit=all",
    check_ids=['ID', 'Dataset'],
    skip_rows=0, sep='\t',
    data_type='File')

# Somehow this failed, probably internal_tags does not work for type=File
# report_file, report_file_path = brew_omics.fetch_encode_metadata(
#     "data/entex/report_file.pkl",
#     "https://www.encodeproject.org/report.tsv?type=File&status=released&internal_tags=ENTEx&limit=all",
#     check_ids=['ID', 'Dataset'],
#     skip_rows=1, sep='\t',
#     data_type='File')

Loaded cached ENCODE Experiment metadata with shape: (1586, 40)
Experiment metadata column 'Accession' value counts: Counter({'ENCSR940FNL': 1, 'ENCSR780CNW': 1, 'ENCSR642DSR': 1, 'ENCSR172LVU': 1, 'ENCSR860GPM': 1, 'ENCSR237GVU': 1, 'ENCSR011HZJ': 1, 'ENCSR181ATL': 1, 'ENCSR743GCE': 1, 'ENCSR525JJM': 1, 'ENCSR360UUM': 1, 'ENCSR153AXQ': 1, 'ENCSR276OPI': 1, 'ENCSR522MTS': 1, 'ENCSR753HPM': 1, 'ENCSR595MTV': 1, 'ENCSR965BLU': 1, 'ENCSR948TOS': 1, 'ENCSR701GIE': 1, 'ENCSR764OHK': 1, 'ENCSR545WFH': 1, 'ENCSR189JIH': 1, 'ENCSR274FJN': 1, 'ENCSR123ZFD': 1, 'ENCSR557QRO': 1, 'ENCSR326YHP': 1, 'ENCSR043XTM': 1, 'ENCSR258OVY': 1, 'ENCSR720WEB': 1, 'ENCSR080JPX': 1, 'ENCSR156WTZ': 1, 'ENCSR514EZP': 1, 'ENCSR589DBF': 1, 'ENCSR621MTP': 1, 'ENCSR074WIB': 1, 'ENCSR714FDJ': 1, 'ENCSR429VWL': 1, 'ENCSR948DNK': 1, 'ENCSR152HLZ': 1, 'ENCSR868ZEI': 1, 'ENCSR757EGB': 1, 'ENCSR652WJE': 1, 'ENCSR178FVP': 1, 'ENCSR012KZW': 1, 'ENCSR845BEG': 1, 'ENCSR803MVC': 1, 'ENCSR585KBH': 1, 'ENCSR799KLF': 1, 'ENCSR702O

In [None]:
# Get the master file report with biosample information
report_file_biosample = pd.read_pickle("data/encode/report_file_biosample.pkl")
report_file_biosample.set_index('Accession', inplace=True, drop=False)
report_file_biosample.index.name = 'Index'
file_accessions_in_report = set(report_file_biosample['Accession'].tolist())

# column "files" in report_expt contains list of file accessions associated with each experiment, separated by commas
file_accessions = []
for file_list in report_expt['Files'].dropna():
    file_accessions.extend([f.strip('/').split('/')[-1] for f in file_list.split(',')])

print(f"A total of file accessions found in experiments: {len(file_accessions)}")
print(file_accessions[:5])

# Load report_file from ENCODE directory and check if all files in experiments are present
missing_files = [f for f in file_accessions if f not in file_accessions_in_report]
if len(missing_files) == 0:
    print("All files in experiments are present in report_file_biosample.")
else:
    print(f"Missing files in report_file_biosample: {missing_files}")

# Extract the rows in report_file_biosample corresponding to the file_accessions
report_file_biosample_entex = report_file_biosample.loc[file_accessions]
print(f"Extracted report_file_biosample_entex shape: {report_file_biosample_entex.shape}")

# Save the extracted report_file_biosample_entex
report_file_biosample_entex.to_pickle("data/entex/report_file_biosample.pkl")
report_file_biosample_entex.to_csv("data/entex/report_file_biosample.csv", index=False)
print("Saved extracted report_file_biosample_entex to data/entex/report_file_biosample_entex.pkl/csv")

# save files_with_biosample for future use
files_with_biosample = report_file_biosample_entex[report_file_biosample_entex['Biosample accession'].notna()]
files_with_biosample.to_pickle("data/entex/files_with_biosample.pkl")
files_with_biosample.to_csv("data/entex/files_with_biosample.csv", index=False)
print("Saved files_with_biosample to data/entex/files_with_biosample.pkl/csv")

A total of file accessions found in experiments: 37291
['ENCFF854PQD', 'ENCFF851TIV', 'ENCFF565HBG', 'ENCFF542ZZT', 'ENCFF064KQN']
All files in experiments are present in report_file_biosample.
Extracted report_file_biosample_entex shape: (37291, 90)
Saved extracted report_file_biosample_entex to data/entex/report_file_biosample_entex.pkl/csv


In [38]:
# save files_with_biosample for future use
files_with_biosample = report_file_biosample_entex[report_file_biosample_entex['Biosample accession'].notna()]
files_with_biosample.to_pickle("data/entex/files_with_biosample.pkl")
files_with_biosample.to_csv("data/entex/files_with_biosample.csv", index=False)
print("Saved files_with_biosample to data/entex/files_with_biosample.pkl/csv")

Saved files_with_biosample to data/entex/files_with_biosample.pkl/csv


In [39]:
importlib.reload(brew_omics)
dataset_path = Path(f"data/entex/dataset")

assay_file_types = {
    'RNA-seq': ['bam',],
    'DNase-seq': ['bam',],
}

report_file_biosample_entex.set_index('Accession', inplace=True, drop=False)
report_file_biosample_entex.index.name = 'Index'

files_groupby_biosample = report_file_biosample_entex.groupby('Biosample accession')
files_groupby_dataset = report_file_biosample_entex.groupby('Dataset')
dataset_path = Path(f"data/entex/dataset")

importlib.reload(brew_omics)
saved_file_accessions = brew_omics.download_encode_files_groupby_biosample(
    report_file_biosample_entex,
    biosample_accessions=list(files_groupby_biosample.groups.keys()),
    assay_file_types=assay_file_types,
    dataset_path=dataset_path,
    files_groupby_biosample=files_groupby_biosample,
    files_groupby_dataset=files_groupby_dataset,
)

print(f"Downloaded a total of {len(saved_file_accessions)} files for selected biosamples and assay types.")
final_files_downloaded = report_file_biosample_entex[report_file_biosample_entex['Accession'].isin(saved_file_accessions)]
final_files_downloaded.to_pickle("data/entex/final_files_downloaded.pkl")
final_files_downloaded.to_csv("data/entex/final_files_downloaded.csv", index=False)
print("Saved final downloaded files metadata to data/entex/final_files_downloaded.pkl/csv")

Biosamples:   4%|▍         | 28/669 [00:00<00:02, 276.98it/s]

Biosample: ENCBS002CZJ; saved metadata to data/entex/dataset/ENCBS002CZJ.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS004OVW; saved metadata to data/entex/dataset/ENCBS004OVW.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS005MNT; saved metadata to data/entex/dataset/ENCBS005MNT.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS005NEU; saved metadata to data/entex/dataset/ENCBS005NEU.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS006KAW; saved metadata to data/entex/dataset/ENCBS006KAW.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR754WLW/', '/experiments/ENCSR264COY/']
    Dataset /experiments/ENCSR754WLW/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files for RNA-seq assay.
    Downloading 3 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS006KAW'
    File accession: ENCFF306MRG
        C

Biosamples:  13%|█▎        | 84/669 [00:00<00:02, 275.30it/s]

Biosample: ENCBS085LTC; saved metadata to data/entex/dataset/ENCBS085LTC.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR471RUK/', '/experiments/ENCSR418FVT/']
    Dataset /experiments/ENCSR471RUK/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files for RNA-seq assay.
    Downloading 3 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS085LTC'
    File accession: ENCFF178ZZZ
        Checking derived from accession: /files/ENCFF582ILA/ for file accession: ENCFF178ZZZ
        File accession: ENCFF582ILA is directly from biosample: ENCBS085LTC
        Downloading ENCFF178ZZZ from https://www.encodeproject.org/files/ENCFF178ZZZ/@@download/ENCFF178ZZZ.bam to data/entex/dataset/ENCBS085LTC/RNA-seq/ENCFF178ZZZ.bam
    File accession: ENCFF879VGW
        Checking derived from accession: /files/ENCFF582ILA/ for file accession: ENCFF879VGW
        File accession: ENCFF582ILA is directly from biosample: ENCBS085LTC
        Downloading ENCFF879VGW from https://www.

Biosamples:  17%|█▋        | 112/669 [00:00<00:02, 249.24it/s]

Biosample: ENCBS158OEU; saved metadata to data/entex/dataset/ENCBS158OEU.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR812JEU/', '/experiments/ENCSR146LBD/']
    Dataset /experiments/ENCSR812JEU/ has 13 files.
    Downloading 2 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS158OEU'
    File accession: ENCFF923MIC
        Checking derived from accession: /files/ENCFF310THB/ for file accession: ENCFF923MIC
        File accession: ENCFF310THB is directly from biosample: ENCBS158OEU
        Downloading ENCFF923MIC from https://www.encodeproject.org/files/ENCFF923MIC/@@download/ENCFF923MIC.bam to data/entex/dataset/ENCBS158OEU/RNA-seq/ENCFF923MIC.bam
    File accession: ENCFF973CHL
        Checking derived from accession: /files/ENCFF310THB/ for file accession: ENCFF973CHL
        File accession: ENCFF310THB is directly from biosample: ENCBS158OEU
        Downloading ENCFF973CHL from https://www.encodeproject.org/files/ENCFF973CHL/@@download/ENCFF973CHL.bam to data/entex

Biosamples:  24%|██▍       | 162/669 [00:00<00:02, 184.42it/s]

Biosample: ENCBS173VHR; saved metadata to data/entex/dataset/ENCBS173VHR.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS174FYT; saved metadata to data/entex/dataset/ENCBS174FYT.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS178APY; saved metadata to data/entex/dataset/ENCBS178APY.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR491VSN/']
    Dataset /experiments/ENCSR491VSN/ has 26 files.
    Downloading 4 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS178APY'
    File accession: ENCFF943JGL
        Checking derived from accession: /files/GRCh38_no_alt_analysis_set_GCA_000001405.15/ for file accession: ENCFF943JGL
        File accession: GRCh38_no_alt_analysis_set_GCA_000001405.15 not found in report_file_biosample index.
        Checking derived from accession: /files/ENCFF881FED/ for file accession: ENCFF943JGL
        File accession: ENCFF881FED has non-string Derived from: nan.
      

Biosamples:  32%|███▏      | 213/669 [00:00<00:02, 213.59it/s]

Biosample: ENCBS243UXH; saved metadata to data/entex/dataset/ENCBS243UXH.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS243YQD; saved metadata to data/entex/dataset/ENCBS243YQD.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS246ZPO; saved metadata to data/entex/dataset/ENCBS246ZPO.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS248LIK; saved metadata to data/entex/dataset/ENCBS248LIK.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS251FWW; saved metadata to data/entex/dataset/ENCBS251FWW.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: ['/experiments/ENCSR278SKG/']
    Dataset /experiments/ENCSR278SKG/ has 40 files.
    Downloading 6 files of type 'bam' for assay 'DNase-seq' in biosample 'ENCBS251FWW'
    File accession: ENCFF928MMI
        Checking derived from accession: /files/ENCFF496RSW/ for file accessi

Biosamples:  40%|████      | 269/669 [00:01<00:01, 243.07it/s]

    Assay: RNA-seq, Datasets: ['/experiments/ENCSR391VGU/']
    Dataset /experiments/ENCSR391VGU/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files for RNA-seq assay.
    Downloading 3 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS323ATG'
    File accession: ENCFF522ZKF
        Checking derived from accession: /files/ENCFF429YOS/ for file accession: ENCFF522ZKF
        File accession: ENCFF429YOS is directly from biosample: ENCBS323ATG
        Downloading ENCFF522ZKF from https://www.encodeproject.org/files/ENCFF522ZKF/@@download/ENCFF522ZKF.bam to data/entex/dataset/ENCBS323ATG/RNA-seq/ENCFF522ZKF.bam
    File accession: ENCFF231YMH
        Checking derived from accession: /files/ENCFF429YOS/ for file accession: ENCFF231YMH
        File accession: ENCFF429YOS is directly from biosample: ENCBS323ATG
        Downloading ENCFF231YMH from https://www.encodeproject.org/files/ENCFF231YMH/@@download/ENCFF231YMH.bam to data/entex/dataset/ENCBS323ATG/RNA-seq/E

Biosamples:  48%|████▊     | 324/669 [00:01<00:01, 256.55it/s]

Biosample: ENCBS404OGY; saved metadata to data/entex/dataset/ENCBS404OGY.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS405TFR; saved metadata to data/entex/dataset/ENCBS405TFR.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: ['/experiments/ENCSR828FVZ/']
    Dataset /experiments/ENCSR828FVZ/ has 38 files.
    Downloading 4 files of type 'bam' for assay 'DNase-seq' in biosample 'ENCBS405TFR'
    File accession: ENCFF984FKS
        Checking derived from accession: /files/ENCFF505UUU/ for file accession: ENCFF984FKS
        Checking derived from accession: /files/ENCFF496RSW/ for file accession: ENCFF505UUU
        File accession: ENCFF496RSW not found in report_file_biosample index.
        Checking derived from accession: /files/ENCFF448XAA/ for file accession: ENCFF505UUU
        File accession: ENCFF448XAA is directly from biosample: ENCBS405TFR
        Downloading ENCFF984FKS from https://www.encodeproject.org/files/ENCFF9

Biosamples:  57%|█████▋    | 379/669 [00:01<00:01, 249.21it/s]

Biosample: ENCBS486GAC; saved metadata to data/entex/dataset/ENCBS486GAC.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS488XGK; saved metadata to data/entex/dataset/ENCBS488XGK.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR219YHR/']
    Dataset /experiments/ENCSR219YHR/ has 26 files.
    Downloading 4 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS488XGK'
    File accession: ENCFF229NDR
        Checking derived from accession: /files/GRCh38_no_alt_analysis_set_GCA_000001405.15/ for file accession: ENCFF229NDR
        File accession: GRCh38_no_alt_analysis_set_GCA_000001405.15 not found in report_file_biosample index.
        Checking derived from accession: /files/ENCFF799SRR/ for file accession: ENCFF229NDR
        File accession: ENCFF799SRR is directly from biosample: ENCBS488XGK
        Downloading ENCFF229NDR from https://www.encodeproject.org/files/ENCFF229NDR/@@download/ENCFF229NDR.bam to data/entex/dataset/ENCBS488XGK

Biosamples:  64%|██████▍   | 430/669 [00:01<00:00, 242.16it/s]

    Assay: RNA-seq, Datasets: ['/experiments/ENCSR383PLJ/', '/experiments/ENCSR098BUF/']
    Dataset /experiments/ENCSR383PLJ/ has 13 files.
    Downloading 2 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS571LWZ'
    File accession: ENCFF525NKJ
        Checking derived from accession: /files/ENCFF657CXK/ for file accession: ENCFF525NKJ
        File accession: ENCFF657CXK is directly from biosample: ENCBS571LWZ
        Downloading ENCFF525NKJ from https://www.encodeproject.org/files/ENCFF525NKJ/@@download/ENCFF525NKJ.bam to data/entex/dataset/ENCBS571LWZ/RNA-seq/ENCFF525NKJ.bam
    File accession: ENCFF938CRE
        Checking derived from accession: /files/ENCFF657CXK/ for file accession: ENCFF938CRE
        File accession: ENCFF657CXK is directly from biosample: ENCBS571LWZ
        Downloading ENCFF938CRE from https://www.encodeproject.org/files/ENCFF938CRE/@@download/ENCFF938CRE.bam to data/entex/dataset/ENCBS571LWZ/RNA-seq/ENCFF938CRE.bam
    Dataset /experiments/ENCSR09

Biosamples:  72%|███████▏  | 484/669 [00:02<00:00, 254.24it/s]

Biosample: ENCBS643MHO; saved metadata to data/entex/dataset/ENCBS643MHO.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS644JJL; saved metadata to data/entex/dataset/ENCBS644JJL.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS644RPG; saved metadata to data/entex/dataset/ENCBS644RPG.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: ['/experiments/ENCSR791BHE/']
    Dataset /experiments/ENCSR791BHE/ has 40 files.
    Downloading 4 files of type 'bam' for assay 'DNase-seq' in biosample 'ENCBS644RPG'
    File accession: ENCFF895NHV
        Checking derived from accession: /files/ENCFF759GQX/ for file accession: ENCFF895NHV
        Checking derived from accession: /files/ENCFF496RSW/ for file accession: ENCFF759GQX
        File accession: ENCFF496RSW not found in report_file_biosample index.
        Checking derived from accession: /files/ENCFF806BGH/ for file accession: ENCFF759GQX
        Fi

Biosamples:  76%|███████▌  | 510/669 [00:02<00:00, 252.28it/s]

Biosample: ENCBS711XVO; saved metadata to data/entex/dataset/ENCBS711XVO.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR425RGZ/', '/experiments/ENCSR961FIG/']
    Dataset /experiments/ENCSR425RGZ/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files for RNA-seq assay.
    Downloading 3 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS711XVO'
    File accession: ENCFF470XVM
        Checking derived from accession: /files/ENCFF223QXV/ for file accession: ENCFF470XVM
        File accession: ENCFF223QXV is directly from biosample: ENCBS711XVO
        Downloading ENCFF470XVM from https://www.encodeproject.org/files/ENCFF470XVM/@@download/ENCFF470XVM.bam to data/entex/dataset/ENCBS711XVO/RNA-seq/ENCFF470XVM.bam
    File accession: ENCFF967XNR
        Checking derived from accession: /files/ENCFF223QXV/ for file accession: ENCFF967XNR
        File accession: ENCFF223QXV is directly from biosample: ENCBS711XVO
        Downloading ENCFF967XNR from https://www.

Biosamples:  85%|████████▍ | 568/669 [00:02<00:00, 271.01it/s]

Biosample: ENCBS801NKM; saved metadata to data/entex/dataset/ENCBS801NKM.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS802IML; saved metadata to data/entex/dataset/ENCBS802IML.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS805AZN; saved metadata to data/entex/dataset/ENCBS805AZN.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS806WZK; saved metadata to data/entex/dataset/ENCBS806WZK.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS812XPB; saved metadata to data/entex/dataset/ENCBS812XPB.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS812ZNH; saved metadata to data/entex/dataset/ENCBS812ZNH.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR485WBR/', '/experiments/ENCSR269YSX/']
    Dataset /experiments/ENCSR485WBR/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files 

Biosamples:  97%|█████████▋| 652/669 [00:02<00:00, 269.31it/s]

Biosample: ENCBS892ESJ; saved metadata to data/entex/dataset/ENCBS892ESJ.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: []
Biosample: ENCBS894VJJ; saved metadata to data/entex/dataset/ENCBS894VJJ.csv
    Assay: RNA-seq, Datasets: ['/experiments/ENCSR812AKX/', '/experiments/ENCSR968DMR/']
    Dataset /experiments/ENCSR812AKX/ has 27 files.
        Excluding 3 'transcriptome alignments' bam files for RNA-seq assay.
    Downloading 3 files of type 'bam' for assay 'RNA-seq' in biosample 'ENCBS894VJJ'
    File accession: ENCFF087LVI
        Checking derived from accession: /files/ENCFF663VCC/ for file accession: ENCFF087LVI
        File accession: ENCFF663VCC is directly from biosample: ENCBS894VJJ
        Downloading ENCFF087LVI from https://www.encodeproject.org/files/ENCFF087LVI/@@download/ENCFF087LVI.bam to data/entex/dataset/ENCBS894VJJ/RNA-seq/ENCFF087LVI.bam
    File accession: ENCFF772DXL
        Checking derived from accession: /files/ENCFF663VCC/ for file acc

Biosamples: 100%|██████████| 669/669 [00:02<00:00, 243.34it/s]


Biosample: ENCBS974IWX; saved metadata to data/entex/dataset/ENCBS974IWX.csv
    Assay: RNA-seq, Datasets: []
    Assay: DNase-seq, Datasets: ['/experiments/ENCSR164WOF/']
    Dataset /experiments/ENCSR164WOF/ has 40 files.
    Downloading 6 files of type 'bam' for assay 'DNase-seq' in biosample 'ENCBS974IWX'
    File accession: ENCFF909FQD
        Checking derived from accession: /files/ENCFF762MJQ/ for file accession: ENCFF909FQD
        File accession: ENCFF762MJQ not found in report_file_biosample index.
        Checking derived from accession: /files/ENCFF258TNQ/ for file accession: ENCFF909FQD
        Checking derived from accession: /files/ENCFF418NRL/ for file accession: ENCFF258TNQ
        File accession: ENCFF418NRL is directly from biosample: ENCBS974IWX
        Downloading ENCFF909FQD from https://www.encodeproject.org/files/ENCFF909FQD/@@download/ENCFF909FQD.bam to data/entex/dataset/ENCBS974IWX/DNase-seq/ENCFF909FQD.bam
    File accession: ENCFF628FMV
        Checking der