In [None]:
from src.datasets.download_cached_files import download_zenodo_files

download_zenodo_files()

In [None]:
from src.dataloader.data_wrapper import (
    RealClinVar, OligogenicDataWrapper, MAVEDataWrapper,
    GWASDataWrapper, ClinVarDataWrapper, GeneKoDataWrapper,
    CellPassportDataWrapper, eQTLDataWrapper, sQTLDataWrapper
)

NUM_RECORDS = 1000
ALL_RECORDS = False
SEQ_LEN = 20

# Load RealClinVar data
data_loader = RealClinVar(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load Oligogenic data
data_loader = OligogenicDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load ClinVar data
data_loader = ClinVarDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load GeneKo data
data_loader = GeneKoDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load CellPassport data
data_loader = CellPassportDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load eQTL data
data_loader = eQTLDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load sQTL data
data_loader = sQTLDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

# Load MAVE data
data_loader = MAVEDataWrapper(num_records=NUM_RECORDS, all_records=ALL_RECORDS)
data = data_loader.get_data(Seq_length=SEQ_LEN)
print(data)

In [None]:
# Exploring the data

from src.dataloader.data_wrapper import (
    RealClinVar, OligogenicDataWrapper, MAVEDataWrapper,
    ClinVarDataWrapper, GeneKoDataWrapper, CellPassportDataWrapper,
    eQTLDataWrapper, sQTLDataWrapper
)

SEQ_LEN = 20
SAMPLES = 3

def show(name, loader_fn):
    try:
        data = loader_fn()
        print(f"{name}: {len(data)} records")
        for row in data[:SAMPLES]:
            print("  ", row)
    except Exception as e:
        print(f"{name}: ERROR {e}")

show("RealClinVar", lambda: RealClinVar(num_records=200, all_records=False).get_data(Seq_length=SEQ_LEN))
show("Oligogenic", lambda: OligogenicDataWrapper(num_records=200, all_records=False).get_data(Seq_length=SEQ_LEN))
show("ClinVar", lambda: ClinVarDataWrapper(num_records=200, all_records=False).get_data(Seq_length=SEQ_LEN))
show("GeneKo", lambda: GeneKoDataWrapper(num_records=200, all_records=False).get_data(Seq_length=SEQ_LEN))
show("CellPassport", lambda: CellPassportDataWrapper(num_records=50, all_records=False).get_data(Seq_length=SEQ_LEN))
show("eQTL", lambda: eQTLDataWrapper(num_records=50, all_records=False).get_data(Seq_length=SEQ_LEN))
show("sQTL", lambda: sQTLDataWrapper(num_records=50, all_records=False).get_data(Seq_length=SEQ_LEN))
show("MAVE", lambda: MAVEDataWrapper(num_records=50, all_records=False).get_data(Seq_length=SEQ_LEN))
