In [10]:
import pandas as pd
from jsonapi_client import Session, Modifier

atlanteco_endpoint = 'super-studies/atlanteco/flagship-studies'
with Session("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
    studies = map(lambda r: r.json, mgnify.iterate(atlanteco_endpoint))
    studies = pd.json_normalize(studies)
studies[:5]

Unnamed: 0,type,id,attributes.accession,attributes.bioproject,attributes.samples-count,attributes.is-private,attributes.last-update,attributes.secondary-accession,attributes.centre-name,attributes.study-abstract,attributes.study-name,attributes.data-origination,relationships.biomes.data
0,studies,MGYS00002392,MGYS00002392,PRJEB6610,1073,False,2024-04-15T20:15:45,ERP006157,GSC,Analysis of 18S DNA in Tara Oceans Protists si...,Amplicon sequencing of Tara Oceans DNA samples...,SUBMITTED,"[{'id': 'root:Environmental:Aquatic:Marine', '..."
1,studies,MGYS00006613,MGYS00006613,PRJEB40759,58,False,2024-03-01T18:29:37,ERP124426,Ocean Sampling Day Consortium,Ocean Sampling Day was initiated by the EU-fun...,18S rRNA amplicon sequencing from the Ocean Sa...,SUBMITTED,"[{'id': 'root:Environmental:Aquatic:Marine', '..."
2,studies,MGYS00006612,MGYS00006612,PRJEB40763,48,False,2024-03-01T18:14:18,ERP124432,Ocean Sampling Day Consortium,Ocean Sampling Day was initiated by the EU-fun...,18S rRNA amplicon sequencing from the Ocean Sa...,SUBMITTED,"[{'id': 'root:Environmental:Aquatic:Marine', '..."
3,studies,MGYS00006611,MGYS00006611,PRJEB55999,63,False,2024-03-01T18:01:09,ERP140920,Ocean Sampling Day Consortium,Ocean Sampling Day was initiated by the EU-fun...,18S rRNA amplicon sequencing from the Ocean Sa...,SUBMITTED,"[{'id': 'root:Environmental:Aquatic:Marine', '..."
4,studies,MGYS00006610,MGYS00006610,PRJEB56005,50,False,2024-03-01T17:44:36,ERP140926,Ocean Sampling Day Consortium,Ocean Sampling Day was initiated by the EU-fun...,18S rRNA amplicon sequencing from the Ocean Sa...,SUBMITTED,"[{'id': 'root:Environmental:Aquatic:Marine', '..."


In [11]:
studies_samples = []

with Session("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
    for idx, study in studies[:6].iterrows():
        print(f"fetching {study.id} samples")
        samples = map(lambda r: r.json, mgnify.iterate(f'studies/{study.id}/samples?page_size=1000'))
        samples = pd.json_normalize(samples)
        samples = pd.DataFrame(data={
            'accession': samples['id'],
            'sample_id': samples['id'],
            'study': study.id, 
            'lon': samples['attributes.longitude'],
            'lat': samples['attributes.latitude'],
            'color': "#FF0000",
        })
        samples.set_index('accession', inplace=True)
        studies_samples.append(samples)
studies_samples = pd.concat(studies_samples)

fetching MGYS00002392 samples
fetching MGYS00006613 samples
fetching MGYS00006612 samples
fetching MGYS00006611 samples
fetching MGYS00006610 samples
fetching MGYS00006609 samples


In [19]:
print(f"fetched {len(studies_samples)} samples")

studies_samples[studies_samples.lon.isna()]

fetched 1485 samples


Unnamed: 0_level_0,sample_id,study,lon,lat,color
accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ERS5150844,ERS5150844,MGYS00006613,,,#FF0000
ERS13444812,ERS13444812,MGYS00006611,,,#FF0000
ERS13444813,ERS13444813,MGYS00006611,,,#FF0000
ERS13444961,ERS13444961,MGYS00006610,,,#FF0000
ERS13444962,ERS13444962,MGYS00006610,,,#FF0000


In [21]:
import leafmap.foliumap as leafmap

m = leafmap.Map(center=(0, 0), zoom=2)
m.add_points_from_xy(
    studies_samples.dropna(),
    x='lon', 
    y='lat', 
    popup=["study", "sample_id"], 
    color_column='color',
    add_legend=False
)
m

In [5]:
analyses = []
with Session("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
    for idx, sample in studies_samples[:10].iterrows():
        print(f"processing {sample.sample_id}")
        filtering = Modifier(f"pipeline_version=5.0&sample_accession={sample.sample_id}&experiment_type=assembly")
        analysis = map(lambda r: r.json, mgnify.iterate('analyses', filter=filtering))
        analysis = pd.json_normalize(analysis)
        analyses.append(analysis)
analyses = pd.concat(analyses)
analyses[:5]

processing ERS1308453
processing ERS1308521
processing ERS1308921
processing ERS1309516
processing ERS1309432
processing ERS1309458
processing ERS1309668
processing ERS506046
processing ERS489065
processing ERS490105


Unnamed: 0,type,id,attributes.analysis-status,attributes.experiment-type,attributes.analysis-summary,attributes.accession,attributes.pipeline-version,attributes.is-private,attributes.last-update,attributes.complete-time,attributes.instrument-platform,attributes.instrument-model,relationships.assembly.data.id,relationships.assembly.data.type,relationships.study.data.id,relationships.study.data.type,relationships.sample.data.id,relationships.sample.data.type
0,analysis-jobs,MGYA00607591,completed,assembly,"[{'key': 'Submitted nucleotide sequences', 'va...",MGYA00607591,5.0,False,2024-01-29T15:29:19.757516,2022-08-26T16:36:56,ILLUMINA,Illumina HiSeq 2500,ERZ7462356,assemblies,MGYS00006058,studies,ERS1308453,samples
0,analysis-jobs,MGYA00607600,completed,assembly,"[{'key': 'Submitted nucleotide sequences', 'va...",MGYA00607600,5.0,False,2024-01-29T15:29:19.757516,2022-08-26T23:33:27,ILLUMINA,Illumina HiSeq 2000,ERZ7512791,assemblies,MGYS00006058,studies,ERS1308521,samples
0,analysis-jobs,MGYA00607626,completed,assembly,"[{'key': 'Submitted nucleotide sequences', 'va...",MGYA00607626,5.0,False,2024-01-29T15:29:19.757516,2022-08-27T18:25:22,ILLUMINA,Illumina HiSeq 2000,ERZ7462167,assemblies,MGYS00006058,studies,ERS1308921,samples
0,analysis-jobs,MGYA00607624,completed,assembly,"[{'key': 'Submitted nucleotide sequences', 'va...",MGYA00607624,5.0,False,2024-01-29T15:29:19.757516,2022-08-27T17:24:19,ILLUMINA,Illumina HiSeq 2000,ERZ7462236,assemblies,MGYS00006058,studies,ERS1309516,samples
0,analysis-jobs,MGYA00607602,completed,assembly,"[{'key': 'Submitted nucleotide sequences', 'va...",MGYA00607602,5.0,False,2024-01-29T15:29:19.757516,2022-08-27T01:59:36,ILLUMINA,Illumina HiSeq 2000,ERZ7463532,assemblies,MGYS00006058,studies,ERS1309432,samples


In [6]:
identifier = "go-terms"
go_term = 'GO:0015878'
go_data = []
with Session("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
    for idx, mgya in analyses.iterrows():
        print(f"processing {mgya.id}")
        analysis_identifier = map(lambda r: r.json, mgnify.iterate(f'analyses/{mgya.id}/{identifier}'))
        analysis_identifier = pd.json_normalize(analysis_identifier)
        go_data.append("#0000FF" if go_term in list(analysis_identifier.id) else "#FF0000")
analyses.insert(2, identifier, go_data, True)

processing MGYA00607591
processing MGYA00607600
processing MGYA00607626
processing MGYA00607624


KeyboardInterrupt: 

In [None]:
df = analyses.join(studies_samples.set_index('sample_id'), on='relationships.sample.data.id')
df2 = df[[identifier, 'lon', 'lat', 'study', 'attributes.accession', 'relationships.study.data.id', 'relationships.sample.data.id', 'relationships.assembly.data.id']].copy()
df2 = df2.set_index("study")
df2 = df2.rename(columns={"attributes.accession": "analysis_ID", 
                          'relationships.study.data.id': "study_ID",
                          'relationships.sample.data.id': "sample_ID", 
                          'relationships.assembly.data.id': "assembly_ID"
                         })
m = leafmap.Map(center=(0, 0), zoom=2)
m.add_points_from_xy(df2, 
                     x='lon', 
                     y='lat', 
                     popup=["study_ID", "sample_ID", "assembly_ID", "analysis_ID"],
                    color_column=identifier, add_legend=False)
m

In [7]:
import leafmap.foliumap as leafmap

In [8]:
m = leafmap.Map()
m.add_basemap("HYBRID")
m