# Tutorial 1: Create a Metadata Schema & Workflow 

The metadata schema presented in this demo is specific to metadata downloaded for the raw RNA-seq CCLE samples, obtained from the SRA project & cBioportal. 

Additionally, we have developed two additional generic NGS metadata schemas for bulk RNA-sequencing and whole exome sequencing to serve as a jumping off point for tailoring to in-house protocols and experiments. For more information on workflows or to check out generic NGS metadata templates, please refer to the Quilt Open package [examples/ngs-metadata-schemas](https://open.quiltdata.com/b/quilt-example/packages/examples/ngs-metadata-schemas)

# config.yml

`./demo_data/workflows/config.yml`

```yaml
version:
  base: "1"
  catalog: "1"
is_workflow_required: true
workflows:
  sra-raw-data:
    name: Upload raw data obtained from the Sequence Read Archive (SRA)
    metadata_schema: sra-raw-data
    handle_pattern: ^ccle/20[0-9]{6}_PRJNA[0-9]{6}_SRR[0-9]{7}$
    is_message_required: true
schemas:
  sra-raw-data:
    url: s3://quilt-example-bucket/ccle/workflows/sra-raw-data.schema.json
```

# sra-raw-data.json

`./demo_data/workflows/sra-raw-data.json`

```json

{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$id": "sra-raw-data",
    "title": "SRA Raw Data",
    "Description": "Metadata required for public data obtained from the Sequence Read Archive (SRA)",
    "type": "object",
    "additionalProperties": false,
    "required": [
        "Run",
        "AssayType",
        "AvgSpotLen",
        "BiomaterialProvider",
        "BioProject",
        "BioSample",
        "BioSampleModel",
        "CellLine",
        "CenterName",
        "Consent",
        "Disease",
        "DiseaseStage",
        "Ethnicity",
        "Experiment",
        "FlowCellID",
        "Instrument",
        "Isolate",
        "LibraryName",
        "LibraryLayout",
        "LibrarySelection",
        "LibrarySource",
        "Organism",
        "Platform",
        "ReleaseDate",
        "SampleName",
        "SampleType",
        "Sex",
        "SRAStudy",
        "Tissue",
        "CreateDate",
        "Version"
    ],
    "properties": {
        "Run": {
            "description": "SRA run ID for sample",
            "type": "string",
            "minLength": 10,
            "maxLength": 10,
            "pattern": "^SRR[0-9]{7}$"
        },
        "Age": {
            "description": "Age of patient sample was collected from in years",
            "type": "number",
            "exclusiveMinimum": 0
        },
        "AssayType": {
            "description": "NGS assay performed to generate data",
            "type": "string",
            "enum": [
                "RNA-Seq",
                "WES",
                "WGS",
                "CRISPR"
            ]
        },
        "AssemblyName": {
            "description": "GenBank assembly accession for reference genome",
            "type": "string",
            "pattern": "^GCA"
        },
        "AvgSpotLen": {
            "description": "Average number of base pairs in a single spot (aka read length)",
            "type": "integer",
            "enum": [
                198,
                202
            ]
        },
        "Bases": {
            "description": "Total number of nucleotide bases in a sequencing dataset for a given sample",
            "type": "integer",
            "exclusiveMinimum": 0
        },
        "BiomaterialProvider": {
            "description": "Name of provider supplying input sample material",
            "type": "string"
        },
        "BioProject": {
            "description": "SRA term. Research project or study and encompasses various types of data, including sequencing data, associated with that project",
            "type": "string",
            "minLength": 11,
            "maxLength": 11,
            "pattern": "^PRJNA[0-9]{6}$"
        },
        "BioSample": {
            "description": "SRA term. Biological sample that has been used in an experiment or study. Each BioSample record contains information about the biological source, such as the organism, tissue, and individual characteristics.",
            "type": "string",
            "minLength": 12,
            "maxLength": 12,
            "pattern": "^SAMN[0-9]{8}$"
        },
        "BioSampleModel": {
            "description": "SRA term. Model or schema used to define structure information of a BioSample record. In the case of tissue samples, this often refers to the species.",
            "type": "string",
            "enum": [
                "Human",
                "Mouse"
            ]
        },
        "Bytes": {
            "description": "Size of the file in bytes when downloaded from SRA database",
            "type": "integer",
            "exclusiveMinimum": 0
        },
        "CellLine": {
            "description": "Name of cell line profiled",
            "type": "string"
        },
        "CenterName": {
            "description": "Name of center profiling cell line",
            "type": "string",
            "enum": [
                "BROAD INSTITUTE"
            ]
        },
        "Consent": {
            "description": "Level of consent to publish cell line data",
            "const": "public"
        },
        "DataStoreFileType": {
            "description": "SRA file format or storage structure used in the SRA data storage system",
            "type": "string"
        },
        "DataStoreProvider": {
            "description": "Provider used to host SRA data",
            "type": "string"
        },
        "DataStoreRegion": {
            "description": "Region SRA data is hosted in",
            "type": "string"
        },
        "Disease": {
            "description": "Name of disease/cancer type cell line sample was generated from",
            "type": "string"
        },
        "DiseaseStage": {
            "description": "Stage of tumor development",
            "anyOf": [
                {
                    "type": "string",
                    "enum": [
                        "primary",
                        "metastasis",
                        "benign_neoplasia"
                    ]
                },
                {
                    "type": "null"
                }
            ]
        },
        "Ethnicity": {
            "description": "Descent of patient cell line was derived from",
            "type": "string",
            "enum": [
                "Caucasian",
                "Asian",
                "African_american"
            ]
        },
        "Experiment": {
            "description": "SRA refers to a specific set of conditions under which sequencing data was generated. It includes details such as the sequencing platform used, library preparation methods, and other experimental parameters.",
            "type": "string",
            "minLength": 10,
            "maxLength": 10,
            "pattern": "^SRX[0-9]{7}$"
        },
        "FlowCellID": {
            "description": "Flow cell ID for sequencing run sample was profiled on",
            "type": "string",
            "pattern": "20[0-9]{6}_PRJNA[0-9]{6}_SRR[0-9]{7}$"
        },
        "Instrument": {
            "description": "Machine sequencing was performed on",
            "type": "string",
            "enum": [
                "Illumina HiSeq 2500",
                "Illumina NovaSeq 6000",
                "Illumina NovaSeq X",
                "Illumina MiSeq"
            ]
        },
        "Isolate": {
            "description": "Source of assay input material eg, substrate nucleic acid extraction performed on",
            "type": "string",
            "enum": [
                "cell line",
                "tissue",
                "whole blood",
                "buffy coat",
                "plasma"
            ]
        },
        "LibraryName": {
            "description": "Name of sequencing library",
            "type": "string",
            "pattern": "^(RNASeq|WES|WGS|CRISPR)"
        },
        "LibraryLayout": {
            "description": "Whether a library contains single ended or paired reads",
            "type": "string",
            "enum": [
                "PAIRED",
                "SINGLE"
            ]
        },
        "LibrarySelection": {
            "description": "Type of nucleic acid fragments (DNA, RNA ...) selected and isolated for sequencing",
            "type": "string",
            "enum": [
                "cDNA"
            ]
        },
        "LibrarySource": {
            "description": "Original nucleic acid source for library",
            "type": "string",
            "enum": [
                "GENOMIC",
                "TRANSCRIPTOMIC"
            ]
        },
        "Organism": {
            "description": "Organism sample was derived from, in latin",
            "type": "string",
            "enum": [
                "Homo sapiens",
                "Mus musculus"
            ]
        },
        "Platform": {
            "description": "Platform used for sequencing",
            "type": "string",
            "enum": [
                "ILLUMINA",
                "NANOPORE",
                "PACBIO"
            ]
        },
        "ReleaseDate": {
            "description": "Date data was released on SRA database",
            "type": "string",
            "format": "date-time"
        },
        "SampleName": {
            "description": "Unique name of sample profiles",
            "type": "string"
        },
        "SampleType": {
            "description": "Type of sample profiled",
            "type": "string",
            "enum": [
                "buffy coat",
                "cell culture",
                "plasma",
                "tissue",
                "whole blood"
            ]
        },
        "Sex": {
            "description": "Sex of patient sample originated from",
            "type": "string",
            "enum": [
                "Unknown",
                "female",
                "male"
            ]
        },
        "SRAStudy": {
            "description": "Study ID from Sequence Read Archive (SRA) database associated with sample",
            "type": "string",
            "minLength": 9,
            "maxLength": 9,
            "pattern": "^SRP[0-9]{6}$"
        },
        "Tissue": {
            "description": "Organ sample was derived from",
            "type": "string",
            "enum": [
                "autonomic_ganglia",
                "biliary_tract",
                "bone",
                "breast",
                "central_nervous_system",
                "cervix",
                "endometrium",
                "haematopoietic_and_lymphoid_tissue",
                "kidney",
                "large_intestine",
                "liver",
                "lung",
                "oesophagus",
                "ovary",
                "pancreas",
                "pleura",
                "prostate",
                "salivary_gland",
                "skin",
                "small_intestine",
                "soft_tissue",
                "stomach",
                "thyroid",
                "upper_aerodigestive_tract",
                "urinary_tract"
            ]
        },
        "CreateDate": {
            "description": "Date data record was created in SRA",
            "type": "string",
            "format": "date-time"
        },
        "CreateDateBatch": {
            "description": "Simplified create_date field to represent data generation batches, custom field in format YYYY-MM-DD",
            "type": "string",
            "format": "date"
        },
        "Version": {
            "decription": "Version of data on SRA",
            "type": "number",
            "exclusiveMinimum": 0
        },
        "StudyID": {
            "description": "Non-SRA Study ID for project",
            "type": "string",
            "const": "ccle_broad_2019"
        },
        "PatientID": {
            "description": "ID for patient sample was acquired from. Not required to be unique, may have obtained multiple samples from the same patient",
            "type": "string"
        },
        "AnnotationSource": {
            "description": "Cancer cell line initiative that annotated sample",
            "type": "string",
            "enum": [
                "ACHILLES",
                "CCLE",
                "collaborator",
                "COSMIC"
            ]
        },
        "CancerType": {
            "description": "High-level cancer type cell line was derived from",
            "type": "string",
            "enum": [
                "B-Lymphoblastic Leukemia/Lymphoma",
                "Bladder Cancer",
                "Bone Cancer",
                "Breast Cancer",
                "CNS Cancer",
                "Cervical Cancer",
                "Cervical Cancer, NOS",
                "Colorectal Cancer",
                "Embryonal Tumor",
                "Endometrial Cancer",
                "Esophagogastric Cancer",
                "Glioma",
                "Head and Neck Cancer",
                "Hepatobiliary Cancer",
                "Hodgkin Lymphoma",
                "Kidney Cancer, NOS",
                "Leukemia",
                "Lung Cancer, NOS",
                "Mature B-Cell Neoplasms",
                "Mature T and NK Neoplasms",
                "Melanoma",
                "Mesothelioma",
                "Myeloproliferative Neoplasms",
                "Non-Small Cell Lung Cancer",
                "Ovarian Cancer",
                "Ovarian/Fallopian Tube Cancer, NOS",
                "Pancreatic Cancer",
                "Peripheral Nervous System",
                "Prostate Cancer",
                "Prostate Cancer, NOS",
                "Renal Cell Carcinoma",
                "Rhabdoid Cancer",
                "Salivary Gland Cancer",
                "Sex Cord Stromal Tumor",
                "Small Bowel Cancer",
                "Small Cell Lung Cancer",
                "Soft Tissue Sarcoma",
                "T-Lymphoblastic Leukemia/Lymphoma",
                "Thyroid Cancer",
                "Thyroid Cancer, NOS",
                "Uterine Sarcoma"
            ]
        },
        "CancerTypeDetailed": {
            "description": "Detailed description of cancer type cell line was derived from",
            "type": "string",
            "enum": [
                "Acute Myeloid Leukemia",
                "Adenosquamous Carcinoma of the Stomach",
                "Adult T-Cell Leukemia/Lymphoma",
                "Alveolar Rhabdomyosarcoma",
                "Anaplastic Astrocytoma",
                "Anaplastic Large Cell Lymphoma",
                "Anaplastic Thyroid Cancer",
                "Astrocytoma",
                "Atypical Teratoid/Rhabdoid Tumor",
                "B-Lymphoblastic Leukemia/Lymphoma",
                "Bladder Squamous Cell Carcinoma",
                "Bladder Urothelial Carcinoma",
                "Breast Invasive Ductal Carcinoma",
                "Brenner Tumor",
                "Burkitt Lymphoma",
                "Cervical Squamous Cell Carcinoma",
                "Cervix",
                "Chondrosarcoma",
                "Chronic Lymphocytic Leukemia/Small Lymphocytic Lymphoma",
                "Chronic Myelogenous Leukemia",
                "Clear Cell Ovarian Cancer",
                "Colorectal Adenocarcinoma",
                "Cutaneous Melanoma",
                "Diffuse Large B-Cell Lymphoma, NOS",
                "Diffuse Type Stomach Adenocarcinoma",
                "Embryonal Rhabdomyosarcoma",
                "Endometrial Carcinoma",
                "Endometrial Stromal Sarcoma",
                "Endometrioid Ovarian Cancer",
                "Esophageal Adenocarcinoma",
                "Esophageal Squamous Cell Carcinoma",
                "Essential Thrombocythemia",
                "Ewing Sarcoma",
                "Extrahepatic Cholangiocarcinoma",
                "Fibroblastic Osteosarcoma",
                "Fibrosarcoma",
                "Follicular Thyroid Cancer",
                "Gallbladder Adenocarcinoma, NOS",
                "Giant Cell Tumor of Bone",
                "Glioblastoma",
                "Glioblastoma Multiforme",
                "Gliosarcoma",
                "Granulosa Cell Tumor",
                "Head and Neck Squamous Cell Carcinoma",
                "Hepatoblastoma",
                "Hepatocellular Carcinoma",
                "High-Grade Glioma, NOS",
                "High-Grade Serous Ovarian Cancer",
                "Hodgkin Lymphoma",
                "Hypopharynx Squamous Cell Carcinoma",
                "Intestinal Type Stomach Adenocarcinoma",
                "Intrahepatic Cholangiocarcinoma",
                "Invasive Breast Carcinoma",
                "Kidney",
                "Large Cell Lung Carcinoma",
                "Larynx Squamous Cell Carcinoma",
                "Leiomyosarcoma",
                "Low-Grade Serous Ovarian Cancer",
                "Lung",
                "Lung Adenocarcinoma",
                "Lung Adenosquamous Carcinoma",
                "Lung Carcinoid",
                "Lung Squamous Cell Carcinoma",
                "Mantle Cell Lymphoma",
                "Mature B-Cell Neoplasms",
                "Mature T and NK Neoplasms",
                "Medulloblastoma",
                "Meningioma",
                "Metaplastic Breast Cancer",
                "Mixed Ovarian Carcinoma",
                "Mucinous Ovarian Cancer",
                "Mucinous Stomach Adenocarcinoma",
                "Mucoepidermoid Carcinoma",
                "Mucoepidermoid Carcinoma of the Lung",
                "Mycosis Fungoides",
                "Neuroblastoma",
                "Non-Small Cell Lung Cancer",
                "Oligodendroglioma",
                "Oral Cavity Squamous Cell Carcinoma",
                "Oropharynx Squamous Cell Carcinoma",
                "Osteosarcoma",
                "Ovarian Epithelial Tumor",
                "Ovary/Fallopian Tube",
                "Pancreatic Adenocarcinoma",
                "Papillary Thyroid Cancer",
                "Peripheral T-Cell lymphoma, NOS",
                "Plasma Cell Myeloma",
                "Pleural Mesothelioma",
                "Prostate",
                "Prostate Adenocarcinoma",
                "Prostate Small Cell Carcinoma",
                "Renal Cell Carcinoma",
                "Renal Clear Cell Carcinoma",
                "Rhabdoid Cancer",
                "Rhabdomyosarcoma",
                "Serous Ovarian Cancer",
                "Signet Ring Cell Carcinoma of the Stomach",
                "Small Cell Carcinoma of the Stomach",
                "Small Cell Lung Cancer",
                "Small Intestinal Carcinoma",
                "Stomach Adenocarcinoma",
                "T-Lymphoblastic Leukemia/Lymphoma",
                "Thyroid",
                "Tubular Stomach Adenocarcinoma",
                "Undifferentiated Pleomorphic Sarcoma/Malignant Fibrous Histiocytoma/High-Grade Spindle Cell Sarcoma",
                "Undifferentiated Stomach Adenocarcinoma",
                "Uterine Carcinosarcoma/Uterine Malignant Mixed Mullerian Tumor",
                "Uterine Endometrioid Carcinoma",
                "Uterine Sarcoma/Mesenchymal"
            ]
        },
        "CellLineSource": {
            "description": "Origin source of cell line, often name of institution that derived or provided cell line to the study",
            "type": "string",
            "enum": [
                "ACADEMIC",
                "ACDC LAB",
                "ATCC",
                "Academic Lab",
                "Acd Lab",
                "DSMZ",
                "EACC",
                "ECAAC",
                "ECACC",
                "GNF",
                "Garraway Lab",
                "HPACC",
                "HSRRB",
                "HSSRB",
                "ICLC",
                "JCRB",
                "JHSF",
                "KCLB",
                "NCI/DCTD",
                "NIBRI",
                "NIBRI/ATCC",
                "PT-6ZZI",
                "RIKEN",
                "Riken",
                "WISTAR"
            ]
        },
        "Characteristics": {
            "description": "Free form description of cell line characteristics, especially as they pertain to growth patterns and morphology",
            "type": "string"
        },
        "DepMapID": {
            "description": "Cell line ID on the Dependency Map (DepMap) portal profiling cancer vulnerabilities of cell lines",
            "type": "string",
            "minLength": 10,
            "maxLength": 10,
            "pattern": "^ACH-[0-9]{6}$"
        },
        "DiseaseOntology": {
            "description": "Disease ontology, similar to detailed cancer type, as defined by cbioportal",
            "enum": [
                "B-cell non-Hodgkin lymphoma",
                "B-prolymphocytic_leukemia",
                "Brain Cancer",
                "Breast Cancer",
                "Burkitts lymphoma",
                "CD30+_anaplastic_large_cell_lymphoma",
                "Colon Cancer",
                "Down syndrome; acute megakaryoblastic leukaemia",
                "Endometrial Cancer",
                "Esophageal Cancer",
                "Ewing family tumor",
                "Ewings_sarcoma",
                "Kidney Cancer",
                "Leukemia",
                "Lung Cancer",
                "Multiple Myeloma",
                "Ovarian Cancer",
                "Pancreatic Cancer",
                "Skin Cancer",
                "T-ALL",
                "TALL",
                "acute lymphoblastic leukemia",
                "acute myelogenous leukemia M5a",
                "acute_monocytic_leukemia",
                "anaplastic_large_cell_lymphoma",
                "b_cell_non_hodgkin_lymphoma",
                "bladder_cancer",
                "bone_cancer",
                "brain_cancer",
                "breast_adenocarcinoma",
                "breast_cancer",
                "bronchioalveolar_carcinoma; non-small_cell lung_cancer",
                "chondrosarcoma",
                "clear_cell_carcinoma",
                "colon_cancer",
                "colon_carcinoma",
                "colon_carcinoma and melanoma",
                "colorectal_cancer",
                "cutaneous T cell lymphoma",
                "diffuse large cell lymphoma non-cleaved type",
                "diffuse_histiocytic_lymphoma",
                "ductal_carcinoma",
                "endometrioid_carcinoma",
                "erythroleukemia",
                "fibrosarcoma_cancer",
                "gastric_cancer",
                "germinal center B-cell-like (GCB) DLBCL",
                "head_neck_cancer",
                "hepatocellular_cancer",
                "histiocytic_lymphoma",
                "intermediately differentiated adenocarcinoma",
                "invasive_ductal_carcinoma",
                "kidney_cancer",
                "large_cell_lung_cancer",
                "leukaemia",
                "liver_cancer",
                "lung_cancer",
                "lymphoma",
                "malignant_melanoma",
                "malignant_papillary_serous_adenocarcinoma",
                "mantle_cell_lymphoma",
                "medulla_carcinoma",
                "medulloblastoma",
                "melanoma",
                "metaplastic_breast_carcinoma",
                "moderately differentiated tubular adenocarcinoma",
                "multiple_myeloma",
                "mycosis_fungoides",
                "myeloma",
                "neuroblastoma",
                "neuroepithelioma",
                "non-Hodgkins lymphoma",
                "non_small_cell_lung_cancer",
                "oesophageal squamous cell carcinoma",
                "osteosarcoma",
                "ovarian_cancer",
                "ovarian_carcinoma",
                "pancreatic_cancer",
                "papillary_adenocarcinoma",
                "plasma_cell_leukemia",
                "pleomorphic hepatocellular carcinoma",
                "poorly_differentiated_gastric carcinoma",
                "prostate_cancer",
                "rectum; adenocarcinoma; well differentiated",
                "recurrent_endometrial_carcinoma",
                "renal_cell_carcinoma",
                "renal_leiomyoblastoma",
                "rhabdoid_tumour",
                "rhabdomyosarcoma",
                "sezary_syndrome",
                "skin_cancer",
                "small_cell_carcinoma",
                "thyroid_squamous_cell_carcinoma",
                "transitional_cell_carcinoma",
                "undifferentiated B lymphoma",
                "well differentiated endometrial adenocarcinoma",
                "well differentiated invasive eosphageal squamous cell carcinoma"
            ]
        },
        "DoublingTimeHrs": {
            "description": "Approximate time, in hours, for cell line population to double",
            "type": "number",
            "exclusiveMinimum": 0
        },
        "FractionGenomeAltered": {
            "description": "Percentage of genome that has been affected by copy number gains or losses, as calculated by cBioportal",
            "type": "number"
        },
        "FreezingMedium": {
            "description": "Medium used to crysopreserve cell lines, often refers to percentage DMSO added to cell culture medium",
            "type": "string",
            "enum": [
                "5% DMSO",
                "10% DMSO",
                "5% DMSO-7.5% DMSO",
                "0.05% DMSO"
            ]
        },
        "GenomeDoublings": {
            "description": "Number of duplications of the entire set of chromosomes within a cell for a given cell line",
            "type": "integer",
            "enum": [
                0.0,
                1.0,
                2.0
            ]
        },
        "GeographicDistribution": {
            "description": "Location of cell line derivation",
            "type": "string",
            "enum": [
                "japan",
                "korea",
                "europe",
                "canada",
                "france",
                "sweden",
                "chinese",
                "taiwan",
                "argentina"
            ]
        },
        "GrowthMedium": {
            "description": "Cell culture or growth media, is an umbrella term that encompasses any gel or liquid created to support cellular growth in the lab",
            "type": "string"
        },
        "Histology": {
            "description": "Histology of tumor cell line was derived from, as defined by microscopic examination of excised tissue from biopsy or resection",
            "type": "string",
            "enum": [
                "Carcinoid-Endocrine_Tumour",
                "Carcinoma",
                "Chondrosarcoma",
                "Ewings_Sarcoma-Peripheral_Primitive_Neuroectodermal_Tumour",
                "Fibrosarcoma",
                "Giant_Cell_Tumour",
                "Glioma",
                "Haematopoietic_Neoplasm",
                "Leiomyosarcoma",
                "Lymphoid_Neoplasm",
                "Malignant_Fibrous_Histiocytoma-Pleomorphic_Sarcoma",
                "Malignant_Melanoma",
                "Meningioma",
                "Mesothelioma",
                "Neuroblastoma",
                "Osteosarcoma",
                "Other",
                "Primitive_Neuroectodermal_Tumour-Medulloblastoma",
                "Rhabdoid_Tumour",
                "Rhabdomyosarcoma",
                "Sarcoma",
                "Sex_Cord-Stromal_Tumour"
            ]
        },
        "HistologySubtype1": {
            "description": "More detailed histological information on tumor cell line was derived from. NS=not specified",
            "type": "string",
            "enum": [
                "B_cell_lymphoma_unspecified",
                "Brenner_tumour",
                "Burkitt_lymphoma",
                "Hodgkin_lymphoma",
                "NS",
                "acute_lymphoblastic_B_cell_leukaemia",
                "acute_lymphoblastic_T_cell_leukaemia",
                "acute_myeloid_leukaemia",
                "adenocarcinoma",
                "adult_T_cell_lymphoma-leukaemia",
                "alveolar",
                "anaplastic_carcinoma",
                "anaplastic_large_cell_lymphoma",
                "astrocytoma",
                "astrocytoma_Grade_III",
                "astrocytoma_Grade_III-IV",
                "astrocytoma_Grade_IV",
                "barrett_associated_adenocarcinoma",
                "blast_phase_chronic_myeloid_leukaemia",
                "bronchioloalveolar_adenocarcinoma",
                "carcinosarcoma-malignant_mesodermal_mixed_tumour",
                "chronic_lymphocytic_leukaemia-small_lymphocytic_lymphoma",
                "chronic_myeloid_leukaemia",
                "clear_cell_carcinoma",
                "clear_cell_renal_cell_carcinoma",
                "dedifferentiated",
                "diffuse_adenocarcinoma",
                "diffuse_large_B_cell_lymphoma",
                "ductal_carcinoma",
                "embryonal",
                "endometrioid_carcinoma",
                "essential_thrombocythaemia",
                "follicular_carcinoma",
                "gliosarcoma",
                "granulosa_cell_tumour",
                "hepatoblastoma",
                "hepatocellular_carcinoma",
                "immortalized_embryonic_fibroblast",
                "immortalized_epithelial",
                "intestinal_adenocarcinoma",
                "large_cell_carcinoma",
                "mantle_cell_lymphoma",
                "medullary_carcinoma",
                "metaplasia",
                "metaplastic_carcinoma",
                "mixed_adenosquamous_carcinoma",
                "mixed_carcinoma",
                "mucinous_carcinoma",
                "mucoepidermoid_carcinoma",
                "mycosis_fungoides-Sezary_syndrome",
                "non_small_cell_carcinoma",
                "oligodendroglioma",
                "papillary_carcinoma",
                "papilloma",
                "peripheral_T_cell_lymphoma_unspecified",
                "plasma_cell_myeloma",
                "renal_cell_carcinoma",
                "serous_carcinoma",
                "signet_ring_adenocarcinoma",
                "small_cell_adenocarcinoma",
                "small_cell_carcinoma",
                "squamous_cell_carcinoma",
                "transitional_cell_carcinoma",
                "tubular_adenocarcinoma",
                "undifferentiated_adenocarcinoma",
                "undifferentiated_carcinoma"
            ]
        },
        "HistologySubtype2": {
            "description": "Even more detailed histological information on tumor cell line was derived from. NS=not specified",
            "type": "string",
            "enum": [
                "L2",
                "M0",
                "M2",
                "M3",
                "M4",
                "M5",
                "M5a",
                "M6",
                "M7",
                "NS",
                "Ph_positive",
                "anaplastic",
                "glioblastoma_multiforme",
                "medullary",
                "papillary",
                "papillary_transitional_cell_carcinoma",
                "squamous_cell_carcinoma"
            ]
        },
        "LifeStage": {
            "description": "Whether cell line was dervied from an adult or pediatric tumor",
            "type": "string",
            "enum": [
                "pediatric",
                "adult"
            ]
        },
        "Lineage": {
            "description": "High-level description of cancer lineage corresponding to cel line",
            "type": "string",
            "enum": [
                "bile_duct",
                "blood",
                "bone",
                "breast",
                "central_nervous_system",
                "cervix",
                "colorectal",
                "engineered_breast",
                "engineered_central_nervous_system",
                "engineered_kidney",
                "engineered_lung",
                "engineered_ovary",
                "engineered_prostate",
                "esophagus",
                "fibroblast",
                "gastric",
                "kidney",
                "liver",
                "lung",
                "lymphocyte",
                "ovary",
                "pancreas",
                "peripheral_nervous_system",
                "plasma_cell",
                "prostate",
                "skin",
                "soft_tissue",
                "thyroid",
                "upper_aerodigestive",
                "urinary_tract",
                "uterus"
            ]
        },
        "LineageMolecularSubtype": {
            "description": "Molecular subtype of tumor cell line was derived from",
            "type": "string",
            "enum": [
                "EWS_ERG",
                "EWS_FLI",
                "HER2_amp",
                "MSI",
                "MYCN_amp",
                "MYC_amp",
                "MYC_exp",
                "basal_A",
                "basal_B",
                "luminal",
                "luminal_HER2_amp",
                "non_MYC"
            ]
        },
        "LineageSubtype": {
            "description": "Lineage subtype of tumor cell line was derived from, more detailed than Lineage",
            "type": "string",
            "enum": [
                "ALL",
                "AML",
                "ATL",
                "ATRT",
                "CLL",
                "CML",
                "Ewing_sarcoma",
                "MMMT",
                "NSCLC",
                "SCLC",
                "bladder_carcinoma",
                "breast_adenocarcinoma",
                "breast_carcinoma",
                "breast_ductal_carcinoma",
                "brenner_tumor",
                "caecum_adenocarcinoma",
                "cervical_carcinoma",
                "cholangiocarcinoma",
                "chondrosarcoma",
                "clear_cell_carcinoma",
                "colorectal_adenocarcinoma",
                "duodenal_adenocarcinoma",
                "endocrine",
                "endometrial_adenocarcinoma",
                "endometrial_adenosquamous",
                "endometrial_stromal_sarcoma",
                "esophagus_adenocarcinoma",
                "esophagus_squamous",
                "exocrine",
                "fibroblast_bone",
                "fibroblast_breast",
                "fibroblast_colorectal",
                "fibroblast_lung",
                "fibroblast_lymphocyte",
                "fibroblast_skin",
                "fibroblast_soft_tissue",
                "fibroblast_upper_aerodigestive",
                "fibroblast_urinary_tract",
                "fibrosarcoma",
                "gallbladder_adenocarcinoma",
                "gastric_adenocarcinoma",
                "gastric_small_cell",
                "glioma",
                "hepatoblastoma",
                "hepatocellular_carcinoma",
                "hodgkin_lymphoma",
                "leiomyosarcoma",
                "lung_carcinoid",
                "lymphoma_unspecified",
                "malignant_rhabdoid_tumor",
                "medulloblastoma",
                "melanoma",
                "meningioma",
                "mesothelioma",
                "mullerian_carcinoma",
                "multiple_myeloma",
                "neuroblastoma",
                "non_hodgkin_lymphoma",
                "osteosarcoma",
                "ovary_adenocarcinoma",
                "ovary_carcinoma",
                "pleomorphic_sarcoma",
                "prostate_adenocarcinoma",
                "prostate_small_cell",
                "renal_cell_carcinoma",
                "rhabdomyosarcoma",
                "thyroid_carcinoma",
                "thyroid_sarcoma",
                "thyroid_squamous",
                "upper_aerodigestive_squamous",
                "uterine_sarcoma"
            ]
        },
        "LineageSubSubtype": {
            "decription": "Lineage sub-subtype of tumor cell line was derived from, more detailed than bath Lineage SubType & Lineage",
            "type": "string",
            "enum": [
                "DLBCL",
                "ERneg_HER2neg",
                "ERneg_HER2pos",
                "ERpos_HER2neg",
                "ERpos_HER2pos",
                "M2",
                "M3",
                "M4",
                "M5",
                "M6",
                "M7",
                "NSCLC_adenocarcinoma",
                "NSCLC_adenosquamous",
                "NSCLC_large_cell",
                "NSCLC_mucoepidermoid",
                "NSCLC_squamous",
                "alveolar",
                "amelanotic",
                "anaplastic",
                "astrocytoma",
                "b_cell",
                "b_cell_burkitt",
                "b_cell_mantle_cell",
                "basaloid",
                "bladder_squamous",
                "bladder_transitional_cell",
                "blast_crisis",
                "clear_cell",
                "diffuse_gastric",
                "embryonal",
                "endometrioid",
                "exocrine_adenocarcinoma",
                "exocrine_adenosquamous",
                "extrahepatic",
                "follicular",
                "glioblastoma",
                "hbs_antigen_carrier",
                "high_grade_serous",
                "hypopharyngeal",
                "intrahepatic",
                "laryngeal",
                "low_grade_serous",
                "med_group_3",
                "mixed_endometrioid_clear_cell",
                "mixed_serous_clear_cell",
                "mucinous",
                "oligodendroglioma",
                "oral",
                "papillary",
                "pharynx",
                "plasmacytoma",
                "renal_leiomyoblastoma",
                "salivary_gland",
                "serous",
                "signet_ring_cell",
                "somatostatinoma",
                "t_cell",
                "t_cell_ALCL",
                "t_cell_cutaneous",
                "tongue",
                "transitional_cell",
                "tubular"
            ]
        },
        "MutationCount": {
            "description": "Total  number of non-synonymous mutations in cell line as reported by cBioportal",
            "type": "integer"
        },
        "MutationRate": {
            "description": "Mutations per megabase of the genome",
            "type": "number"
        },
        "CellLineNickName": {
            "description": "Shortened name for cell line",
            "type": "string"
        },
        "OncotreeCode": {
            "description": "OncoTree is an open-source ontology that was developed at Memorial Sloan Kettering Cancer Center (MSK) for standardizing cancer type diagnosis from a clinical perspective by assigning each diagnosis a unique OncoTree code.",
            "type": "string",
            "enum": [
                "AASTR",
                "ALCL",
                "AML",
                "ARMS",
                "ASTR",
                "ATLL",
                "ATRT",
                "BL",
                "BLCA",
                "BLL",
                "BLSC",
                "BRCA",
                "BTOV",
                "CCOV",
                "CCRCC",
                "CERVIX",
                "CESC",
                "CHS",
                "CLLSLL",
                "CML",
                "COADREAD",
                "DLBCLNOS",
                "DSTAD",
                "EHCH",
                "EOV",
                "ERMS",
                "ES",
                "ESCA",
                "ESCC",
                "ESS",
                "ET",
                "FIBS",
                "FIOS",
                "GB",
                "GBAD",
                "GBM",
                "GCTB",
                "GRCT",
                "GSARC",
                "HCC",
                "HGGNOS",
                "HGSOC",
                "HL",
                "HNSC",
                "HPHSC",
                "IDC",
                "IHCH",
                "ISTAD",
                "KIDNEY",
                "LCLC",
                "LGSOC",
                "LIHB",
                "LMS",
                "LUAD",
                "LUAS",
                "LUCA",
                "LUMEC",
                "LUNG",
                "LUSC",
                "LXSC",
                "MBC",
                "MBL",
                "MBN",
                "MCL",
                "MFH",
                "MNG",
                "MOV",
                "MRT",
                "MSTAD",
                "MTNN",
                "MUCC",
                "MXOV",
                "MYCF",
                "NBL",
                "NSCLC",
                "OCSC",
                "ODG",
                "OPHSC",
                "OS",
                "OVARY",
                "OVT",
                "PAAD",
                "PCM",
                "PLMESO",
                "PRAD",
                "PROSTATE",
                "PRSCC",
                "PTCL",
                "RCC",
                "RMS",
                "SCLC",
                "SIC",
                "SKCM",
                "SOC",
                "SSRCC",
                "STAD",
                "STAS",
                "STSC",
                "THAP",
                "THFO",
                "THPA",
                "THYROID",
                "TLL",
                "TSTAD",
                "UCEC",
                "UCS",
                "UEC",
                "USARC",
                "USTAD"
            ]
        },
        "PathologistAnnotation": {
            "description": "Cancer type of tumor cell line was derived from according to pathologist",
            "type": "string",
            "enum": [
                "Biliary_Tract:Carcinoma",
                "Bladder:Carcinoma",
                "Bone:Sarcoma_Chondro",
                "Bone:Sarcoma_Ewing",
                "Bone:Sarcoma_Osteo",
                "Breast:Carcinoma",
                "CNS:Glioma",
                "CNS:Glioma_HighGrade",
                "CNS:Medulloblastoma",
                "Colorectal:Carcinoma",
                "Endocrine",
                "Endometrium:Carcinoma",
                "Endometrium:Others",
                "Gastric:Carcinoma",
                "Kidney:Carcinoma",
                "Leukemia:ALL",
                "Leukemia:AML",
                "Leukemia:CLL",
                "Leukemia:CML",
                "Leukemia:MPD",
                "Liver:Carcinoma_Adeno",
                "Liver:HCC",
                "Liver:Hepatoblastoma",
                "Lung:Mesothelioma",
                "Lung:NSCLC_Adeno",
                "Lung:NSCLC_Large_Cell",
                "Lung:NSCLC_Others",
                "Lung:NSCLC_Squamous",
                "Lung:Others",
                "Lung:SCLC",
                "Lymphoma:Hodgkin",
                "Lymphoma:Multiple_Myeloma",
                "Lymphoma:NH_B_cell",
                "Lymphoma:NH_T_cell",
                "Oesophagus:Benign",
                "Oesophagus:Carcinoma",
                "Ovary:Carcinoma",
                "Ovary:Germline",
                "PNET:Neuroblastoma",
                "Pancreas:Carcinoma",
                "Prostate:Carcinoma",
                "Salivary_Gland:Carcinoma",
                "Skin:Melanoma",
                "Soft_Tissue:Sarcoma_Fibro",
                "Soft_Tissue:Sarcoma_FibrousHistiocytoma",
                "Soft_Tissue:Sarcoma_Leiomyo",
                "Soft_Tissue:Sarcoma_Others",
                "Soft_Tissue:Sarcoma_Rhabdoid",
                "Thyroid:Carcinoma",
                "Upper_Aerodigestive_Tract:Carcinoma"
            ]
        },
        "Ploidy": {
            "description": "Ploidy is the number of complete sets of chromosomes in a cell, baseline unaltered ploisy in humans is 2",
            "type": "number"
        },
        "PrimaryTumorSite": {
            "description": "Location of primary tumor in patient cell line was derived from. If the cell line is derived from a metastasis, the primary tumor location may differ from the anatomical location the cell line source material was collected from.",
            "type": "string",
            "enum": [
                "Autonomic_Ganglia",
                "Biliary_Tract",
                "Bone",
                "Breast",
                "Central_Nervous_System",
                "Endometrium",
                "Haematopoietic_And_Lymphoid_Tissue",
                "Kidney",
                "Large_Intestine",
                "Liver",
                "Lung",
                "Oesophagus",
                "Ovary",
                "Pancreas",
                "Pleura",
                "Prostate",
                "Salivary_Gland",
                "Skin",
                "Small_Intestine",
                "Soft_Tissue",
                "Stomach",
                "Thyroid",
                "Upper_Aerodigestive_Tract",
                "Urinary_Tract"
            ]
        },
        "Proteomics10PlexID": {
            "description": "Cell line ID for Proteomics 10-Plex assay to align datasets by sample",
            "type": "number"
        },
        "ProteomicsTMTLabel": {
            "description": "Tandem Mass Tag (TMT) used in proteomics of cell line in paired dataset",
            "type": "string"
        },
        "Purity": {
            "description": "Proportion of cells in sample of malignant origin, a value of 1 denotes 100% tumor cells in sample",
            "type": "number"
        },
        "SiteOfFinding": {
            "description": "Location of tumor cell line was derived from",
            "type": "string",
            "enum": [
                "abdomen",
                "abdominal_wall",
                "acetabulum",
                "adrenal",
                "ascites",
                "axillary_node",
                "bone",
                "bone_marrow",
                "brain",
                "breast",
                "central_nervous_system",
                "cerebrospinal_fluid",
                "cervix",
                "connective_tissue_trunk",
                "liver",
                "lung",
                "lymph_node",
                "muscle",
                "omentum",
                "pelvic_wall",
                "pericardial_effusion",
                "peritoneum",
                "pleura",
                "skin",
                "soft_tissue",
                "spleen",
                "subcutaneous_tissue",
                "supra_orbital_area",
                "vertebra"
            ]
        },
        "SiteSubtype1": {
            "description": "Detailed site location of tumor cell line was derived from",
            "type": "string",
            "enum": [
                "NS",
                "bile_duct",
                "bladder",
                "brain",
                "bronchus",
                "caecum",
                "cerebellum",
                "colon",
                "duodenum",
                "femur",
                "fibrous_tissue_and_uncertain_origin",
                "frontal_lobe",
                "head_neck",
                "humerus",
                "larynx",
                "left_upper_lobe",
                "lower_third",
                "lymph_node",
                "meninges",
                "middle_third",
                "mouth",
                "nan",
                "parietal_lobe",
                "pelvis",
                "pharynx",
                "rectum",
                "skin",
                "smooth_muscle",
                "spleen",
                "striated_muscle",
                "submaxillary",
                "temporal_lobe",
                "tongue",
                "upper_leg",
                "upper_third",
                "ureter"
            ]
        },
        "SiteSubtype2": {
            "description": "Detailed site location of tumor cell line was derived from, more detailed than Site Subtype 1",
            "type": "string",
            "enum": [
                "NS",
                "abdomen",
                "gingiva",
                "glottis",
                "hypopharynx",
                "kidney",
                "nan",
                "ovary",
                "right",
                "sigmoid",
                "subglottis",
                "supraglottis",
                "thyroid",
                "tongue",
                "uterus",
                "vulva"
            ]
        },
        "Subtype": {
            "description": "Tumor subtype cell line was derived from",
            "type": "string",
            "enum": [
                "ATL",
                "Acute Lymphoblastic Leukemia (ALL), B-cell",
                "Acute Lymphoblastic Leukemia (ALL), T-cell",
                "Acute Myelogenous Leukemia (AML)",
                "Acute Myelogenous Leukemia (AML), M2 (Myeloblastic)",
                "Acute Myelogenous Leukemia (AML), M3 (Promyelocytic)",
                "Acute Myelogenous Leukemia (AML), M4 (Myelomonocytic)",
                "Acute Myelogenous Leukemia (AML), M5 (Eosinophilic/Monocytic)",
                "Acute Myelogenous Leukemia (AML), M6 (Erythroleukemia)",
                "Acute Myelogenous Leukemia (AML), M7 (Megakaryoblastic)",
                "Adenocarcinoma",
                "Adenocarcinoma, clear cell",
                "Adenocarcinoma, endometrioid",
                "Adenocarcinoma, high grade serous",
                "Adenocarcinoma, low grade serous",
                "Adenocarcinoma, mixed",
                "Adenocarcinoma, mucinous",
                "Adenocarcinoma, serous",
                "Adenocarcinoma, signet ring cell",
                "Adenocarcinoma, tubular",
                "Astrocytoma",
                "Atypical Teratoid Rhabdoid Tumor (ATRT)",
                "B-cell",
                "B-cell, Hodgkins",
                "B-cell, Non-Hodgkins",
                "B-cell, Non-Hodgkins, Burkitts",
                "B-cell, Non-Hodgkins, Mantle Cell",
                "Bladder",
                "Bone",
                "Breast",
                "Breast Ductal Carcinoma",
                "Caecum Adenocarcinoma",
                "Carcinoid",
                "Carcinoma",
                "Carcinoma, anaplastic",
                "Carcinoma, brenner",
                "Carcinoma, follicular",
                "Carcinoma, papillary",
                "Cholangiocarcinoma, extrahepatic",
                "Cholangiocarcinoma, intrahepatic",
                "Chondrosarcoma",
                "Chronic Lymphoblastic Leukemia (CLL), B-cell",
                "Chronic Myelogenous Leukemia (CML)",
                "Chronic Myelogenous Leukemia (CML), blast crisis",
                "Clear Cell Carcinoma",
                "Colorectal",
                "Cystadenocarcinoma, clear cell",
                "Cystadenocarcinoma, endometrioid",
                "Cystadenocarcinoma, high grade serous",
                "Cystadenocarcinoma, mucinous",
                "Diffuse Large B-cell Lymphoma (DLBCL)",
                "Ductal Adenocarcinoma, exocrine",
                "Ductal Adenosquamous Carcinoma",
                "Ductal Adenosquamous Carcinoma, exocrine",
                "Duodenal Adenocarcinoma",
                "Endometrial Adenocarcinoma",
                "Endometrial Adenosquamous Carcinoma",
                "Endometrial Stromal Sarcoma",
                "Ewings Sarcoma",
                "Fibrosarcoma",
                "Glioblastoma",
                "Glioma",
                "Hepatoblastoma",
                "Hepatocellular Carcinoma",
                "Hepatocellular Carcinoma, HBs-antigen carrier",
                "Leiomyosarcoma",
                "Lung",
                "Lymphoma",
                "Malignant Mixed Mullerian Tumor (MMMT)",
                "Malignant Rhabdoid Tumor",
                "Medulloblastoma",
                "Melanoma",
                "Melanoma, amelanotic",
                "Meningioma",
                "Mesothelioma",
                "Mullerian Carcinoma",
                "Multiple Myeloma",
                "Multiple Myeloma, plasmacytoma",
                "Myeloma",
                "Non-Small Cell Lung Cancer (NSCLC), Adenocarcinoma",
                "Non-Small Cell Lung Cancer (NSCLC), Adenosquamous Carcinoma",
                "Non-Small Cell Lung Cancer (NSCLC), Large Cell Carcinoma",
                "Non-Small Cell Lung Cancer (NSCLC), Mucoepidermoid Carcinoma",
                "Non-Small Cell Lung Cancer (NSCLC), Squamous Cell Carcinoma",
                "Non-Small Cell Lung Cancer (NSCLC), unspecified",
                "Oligodendroglioma",
                "Osteosarcoma",
                "Pleomorphic Sarcoma",
                "Renal Carcinoma, clear cell",
                "Renal Carcinoma, transitional cell",
                "Renal Cell Carcinoma",
                "Renal Leiomyoblastoma",
                "Rhabdomyosarcoma",
                "Rhabdomyosarcoma, alveolar",
                "Rhabdomyosarcoma, embryonal",
                "Skin",
                "Small Cell Carcinoma",
                "Small Cell Lung Cancer (SCLC)",
                "Soft tissue",
                "Somatostatinoma",
                "Squamous Cell Carcinoma",
                "Squamous Cell Carcinoma, basaloid",
                "Squamous Cell Carcinoma, hypopharyngeal",
                "Squamous Cell Carcinoma, laryngeal",
                "Squamous Cell Carcinoma, oral",
                "Squamous Cell Carcinoma, pharynx",
                "Squamous Cell Carcinoma, salivary gland",
                "Squamous Cell Carcinoma, tongue",
                "T-cell",
                "T-cell, Non-Hodgkins, Anaplastic Large Cell (ALCL)",
                "T-cell, Non-Hodgkins, Cutaneous",
                "Thyroid Sarcoma",
                "Transitional Cell Carcinoma",
                "Upper aerodigestive",
                "Uterine Sarcoma"
            ]
        },
        "Supplements": {
            "description": "Additivies to cell line growth medium",
            "type": "string"
        },
        "TMBNonSynonymous": {
            "description": "Tumor Mutation Burden (TMB), number of nonsyn mutations per MB of DNA, as calculated by cBioportal",
            "type": "number"
        }
    }
}

```