In [53]:
import pandas as pd
from typing import Any
import json 

# Extract table from spreadsheet:
df = (
    pd.read_csv('https://docs.google.com/spreadsheets/d/1G3zq-aJg3uKBz3VZUvGV8yYVrECtpQ9SJoGX6X3B-ms/export?format=csv', sep=',')
    .loc[lambda df: df.output.notna()]
)
print(df.head())

# Convert table to mapping:
datasoruce_mapping = (
    df[['output', 'niceName', 'description']]
    .set_index(
        pd.Series(df.datasourceId.to_list())
    )
    .to_dict(orient='index')
)

datasoruce_mapping

         datasourceId                    source configuration  \
0   cancer_biomarkers  cancerbiomarkers.json.gz            OK   
1  cancer_gene_census            cosmic.json.gz           OK    
2              chembl            chembl.json.gz            OK   
3             clingen           clingen.json.gz            OK   
4              crispr            crispr.json.gz            OK   

  Direction of effect  run time (s) isScoreOK    valid old valid invalid  \
0                 NaN          47.0        OK    1,300     1,300      49   
1                  OK          55.0        OK   91,572    91,572  11,300   
2                  OK          27.0        OK  573,103   573,103  18,243   
3                 NaN          18.0        OK    3,047     3,047     292   
4                 NaN          52.0        OK      517       517       0   

  old invalid comment                       output  \
0          49     NaN   evidence_cancer_biomarkers   
1      11,300     NaN  evidence_cancer_gene_

{'cancer_biomarkers': {'output': 'evidence_cancer_biomarkers',
  'niceName': 'Cancer biomarker evidence',
  'description': 'Disease-target evidence based on cancer biomarkers from the Cancer Genome Interpreter'},
 'cancer_gene_census': {'output': 'evidence_cancer_gene_census',
  'niceName': 'Cancer Gene Census evidence',
  'description': 'Disease-target evidence based on Cancer Gene Census provided by COSMIC'},
 'chembl': {'output': 'evidence_chembl',
  'niceName': 'ChEMBL evidence',
  'description': 'Disease-target evidence based on drug indications, clinical trials and mechanism of action provided by ChEMBL'},
 'clingen': {'output': 'evidence_clingen',
  'niceName': 'ClinGen evidence',
  'description': 'Disease-target evidence from Gene–Disease Validity Curation of ClinGen'},
 'crispr': {'output': 'evidence_crispr',
  'niceName': 'ProjectScore evidence',
  'description': 'Diease-target evidence from unified analysis of whole-genome CRISPR/Cas9 screens from ProjectScore project'},
 'c

In [56]:
def process_dataset(output: str, nice_name:str, description:str) -> dict[str,Any]:
    return {
        "id": output,
        "nice_name": nice_name,
        "key": [ f"{output}/id"],
        "description": description,
        "tags": ["Target-Disease"]
    }

print(
    json.dumps(
        [
            process_dataset(value['output'], value['niceName'], value['description'])
            for value in datasoruce_mapping.values()
        ],
        indent=2
    )
)

[
  {
    "id": "evidence_cancer_biomarkers",
    "nice_name": "Cancer biomarker evidence",
    "key": [
      "evidence_cancer_biomarkers/id"
    ],
    "description": "Disease-target evidence based on cancer biomarkers from the Cancer Genome Interpreter",
    "tags": [
      "Target-Disease"
    ]
  },
  {
    "id": "evidence_cancer_gene_census",
    "nice_name": "Cancer Gene Census evidence",
    "key": [
      "evidence_cancer_gene_census/id"
    ],
    "description": "Disease-target evidence based on Cancer Gene Census provided by COSMIC",
    "tags": [
      "Target-Disease"
    ]
  },
  {
    "id": "evidence_chembl",
    "nice_name": "ChEMBL evidence",
    "key": [
      "evidence_chembl/id"
    ],
    "description": "Disease-target evidence based on drug indications, clinical trials and mechanism of action provided by ChEMBL",
    "tags": [
      "Target-Disease"
    ]
  },
  {
    "id": "evidence_clingen",
    "nice_name": "ClinGen evidence",
    "key": [
      "evidence_cling