In [1]:
#%pip install pyspark

In [2]:
#%pip install xarray

In [3]:
#%pip install netCDF4

In [4]:
#%pip install esgf-pyclient==0.3.1

In [5]:
from pyspark.sql import SparkSession
import xarray as xr
import os
#os.environ["SPARK_LOCAL_DIR"] = "C:/tmp/spark"
os.environ["SPARK_LOCAL_DIR"] = "C:/Users/thais/spark-temp"

In [2]:
spark = SparkSession.builder.master("local").appName("Test").getOrCreate()
df = spark.createDataFrame([(1, "Hello"), (2, "World")], ["id", "message"])
df.show()

+---+-------+
| id|message|
+---+-------+
|  1|  Hello|
|  2|  World|
+---+-------+



### listagem de variáveis do CMPI6

In [1]:
import sys

# apontar para a versão local da lib
sys.path.insert(0, './dreqPy')

In [2]:
from dreqPy import dreq

In [3]:
# inicializando a biblioteca
dq = dreq.loadDreq()

In [4]:
# listando variáveis relacionadas à temperatura
temperature_vars = [var for var in dq.coll["var"].items if "temperature" in var.title.lower()]

for v in temperature_vars:
    print(v.label, "-", v.title, "-", v.sn)

bigthetao - Sea Water Conservative Temperature - sea_water_conservative_temperature
bigthetaoga - Global Average Sea Water Conservative Temperature - sea_water_conservative_temperature
edt - Eddy Diffusivity Coefficient for Temperature - atmosphere_heat_diffusivity
hfevapds - Temperature Flux Due to Evaporation Expressed as Heat Flux out of Sea Water - temperature_flux_due_to_evaporation_expressed_as_heat_flux_out_of_sea_water
hfrainds - Temperature Flux Due to Rainfall Expressed as Heat Flux into Sea Water - temperature_flux_due_to_rainfall_expressed_as_heat_flux_into_sea_water
hfrunoffds - Temperature Flux Due to Runoff Expressed as Heat Flux into Sea Water - temperature_flux_due_to_runoff_expressed_as_heat_flux_into_sea_water
litempbotfl - Basal Temperature of Floating Ice Shelf - land_ice_basal_temperature
litempbotgr - Basal Temperature of Grounded Ice Sheet - land_ice_basal_temperature
litemptop - Temperature at Top of Ice Sheet Model - temperature_at_top_of_ice_sheet_model
litem

### acesso ao ESGF

In [6]:
from collections import Counter, defaultdict
import logging
from pyesgf.search import SearchConnection

In [7]:
os.environ["ESGF_PYCLIENT_NO_FACETS_STAR_WARNING"] = "1"

In [8]:
logging.getLogger('pyesgf.search').setLevel(logging.ERROR)

In [9]:
# variáveis relevantes de acordo com GCOS
variaveis = ["tas", "tasmax", "tasmin", "ps", "pr", "hur", "rsds", "rlut", "uas", "vas", "tos", "sos", "sic"]

In [13]:
variaveis = ["tas", "tasmin", "tasmax", "tos", "ps", "pr"]  # exemplo com 6 variáveis
#variaveis = ["tas"]
#models = ["EC-Earth3", "CanESM5", "MPI-ESM1-2-LR", "IPSL-CM5A2-INCA"]
modelos_por_variavel = defaultdict(set)

conn = SearchConnection("https://esgf-node.llnl.gov/esg-search", distrib=False, timeout=300)
#conn = SearchConnection("https://esgf-node.ornl.gov/esg-search", distrib=False)
#conn = SearchConnection("https://eagle.alcf.anl.gov/esg-search", distrib=False)

for var in variaveis:
    ctx = conn.new_context(
        project="CMIP6",
        experiment_id=["historical", "ssp245", "ssp585"],
        variable_id=var,
        frequency="mon",
        #source_id="MIROC6",
        replica=False,
        latest=True
    )

    results = list(ctx.search(ignore_facet_check=True))
    for d in results:
        modelo = d.json.get("source_id", [None])[0]
        if modelo:
            modelos_por_variavel[var].add(modelo)

# Exibindo os resultados
for var, modelos in modelos_por_variavel.items():
    print(f"{var}: {sorted(modelos)}")

tas: ['ACCESS-CM2', 'ACCESS-ESM1-5', 'AWI-CM-1-1-MR', 'BCC-CSM2-MR', 'CAMS-CSM1-0', 'CAS-ESM2-0', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CIESM', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'CanESM5', 'CanESM5-1', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'EC-Earth3', 'EC-Earth3-CC', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'FGOALS-f3-L', 'FGOALS-g3', 'FIO-ESM-2-0', 'GFDL-ESM4', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'IITM-ESM', 'INM-CM4-8', 'INM-CM5-0', 'IPSL-CM6A-LR', 'KACE-1-0-G', 'KIOST-ESM', 'MCM-UA-1-0', 'MIROC-ES2H', 'MIROC-ES2L', 'MIROC6', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'NESM3', 'NorESM2-LM', 'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL']
tasmin: ['ACCESS-CM2', 'ACCESS-ESM1-5', 'AWI-CM-1-1-MR', 'BCC-CSM2-MR', 'CAS-ESM2-0', 'CESM2', 'CESM2-WACCM', 'CIESM', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'CanESM5', 'CanESM5-1', 'CanESM5-CanOE'

In [10]:
# obtendo informações dos datasets para as variáveis

variaveis = ["tas", "tasmin", "tasmax", "tos", "ps", "pr"]  # exemplo com 6 variáveis
#variaveis = ["tas"]
models = "EC-Earth3" # ["EC-Earth3", "CanESM5", "MPI-ESM1-2-LR", "IPSL-CM5A2-INCA"]
experiments = ["historical", "ssp245", "ssp585"]
modelos_por_variavel = defaultdict(set)
infos_datasets = {}

conn = SearchConnection("https://esgf-node.llnl.gov/esg-search", distrib=False, timeout=300)
#conn = SearchConnection("https://esgf-node.ornl.gov/esg-search", distrib=False)
#conn = SearchConnection("https://eagle.alcf.anl.gov/esg-search", distrib=False)

for var in variaveis:
    ctx = conn.new_context(
        project="CMIP6",
        experiment_id="historical",
        variable_id=var,
        frequency="mon",
        source_id=models,
        replica=False,
        latest=True
    )

    results = list(ctx.search(ignore_facet_check=True))
    infos_datasets[var] = results

    for d in results:
        modelo = d.json.get("source_id", [None])[0]
        
        if modelo:
            modelos_por_variavel[var].add(modelo)

# Exibindo os resultados
for var, modelos in modelos_por_variavel.items():
    print(f"{var}: {sorted(modelos)}")

tas: ['EC-Earth3']
tasmin: ['EC-Earth3']
tasmax: ['EC-Earth3']
tos: ['EC-Earth3']
ps: ['EC-Earth3']
pr: ['EC-Earth3']


In [23]:
agrupamento = defaultdict(set)

for var in variaveis:
    print(f"quantidadede de DatasetResults da variável {var}: {len(infos_datasets[var])}")

    for d in infos_datasets["tas"]:
        experimento = d.json.get("experiment_id", ["-"])[0]
        membro = d.json.get("member_id", ["-"])[0]
        agrupamento[experimento].add(membro)

    for exp, membros in agrupamento.items():
        print(f"{exp}: {len(membros)} membros\n-----\n")

quantidadede de DatasetResults da variável tas: 230
historical: 74 membros
-----

quantidadede de DatasetResults da variável tasmin: 230
historical: 74 membros
-----

quantidadede de DatasetResults da variável tasmax: 230
historical: 74 membros
-----

quantidadede de DatasetResults da variável tos: 315
historical: 74 membros
-----

quantidadede de DatasetResults da variável ps: 60
historical: 74 membros
-----

quantidadede de DatasetResults da variável pr: 228
historical: 74 membros
-----



In [24]:
# identificando os datasets mais recentes de cada variável
from datetime import datetime

dataset_mais_recente_por_variavel = {}

for var in variaveis:
    datasets = infos_datasets[var]

    # filtrando apenas datasets com creation_date válido
    datasets_validos = [
        d for d in datasets if d.json.get("creation_date") is not None
    ]

    if not datasets_validos:
        print(f"[{var}] Nenhum dataset com creation_date disponível.")
        continue

    # selecionando o dataset mais recente
    dataset_mais_recente = max(
        datasets_validos,
        key=lambda d: datetime.fromisoformat(d.json["creation_date"].replace("Z", ""))
    )

    dataset_mais_recente_por_variavel[var] = dataset_mais_recente

    print(f"[{var}] Dataset mais recente:")
    print(f"  ID.............: {dataset_mais_recente.dataset_id}")
    print(f"  Experimento....: {dataset_mais_recente.json.get('experiment_id', ['?'])[0]}")
    print(f"  Membro.........: {dataset_mais_recente.json.get('member_id', ['?'])[0]}")
    print(f"  Data criação...: {dataset_mais_recente.json.get('creation_date')}")
    print(f"  Número arquivos: {dataset_mais_recente.json.get('number_of_files')}")
    print("")

[tas] Dataset mais recente:
  ID.............: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tas.gr.v20201001|eagle.alcf.anl.gov
  Experimento....: historical
  Membro.........: r5i1p1f1
  Data criação...: 2020-09-27T04:59:24Z
  Número arquivos: 165

[tasmin] Dataset mais recente:
  ID.............: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tasmin.gr.v20201001|eagle.alcf.anl.gov
  Experimento....: historical
  Membro.........: r5i1p1f1
  Data criação...: 2020-09-28T22:49:32Z
  Número arquivos: 165

[tasmax] Dataset mais recente:
  ID.............: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tasmax.gr.v20201001|eagle.alcf.anl.gov
  Experimento....: historical
  Membro.........: r5i1p1f1
  Data criação...: 2020-09-25T23:51:27Z
  Número arquivos: 165

[tos] Dataset mais recente:
  ID.............: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Omon.tos.gn.v20201001|eagle.alcf.anl.gov
  Experimento....: histori

In [25]:
download_dir = r"C:\Users\thais\OneDrive\Documentos\#POLI\2024_4\Trabalho de Conclusão de Curso\cmip6_ingestion\datasets\ec_earth3"  # ajuste para o diretório desejado
os.makedirs(download_dir, exist_ok=True)

for var, dataset in dataset_mais_recente_por_variavel.items():
    print(f"\n[{var}] Localizando arquivos...")
    files = list(dataset.file_context().search())
    
    print(f"  Total de arquivos encontrados: {len(files)}")

    # Apenas arquivos HTTP disponíveis
    files_http = [f for f in files if "HTTPServer" in f.json.get("access", [])]

    if not files_http:
        print(f"  Nenhum arquivo com acesso HTTP disponível para {var}")
        continue

    for f in files_http:
        url = f.download_url()
        filename = os.path.join(download_dir, var, os.path.basename(url))
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        
        # Comando para baixar com wget
        cmd = f"wget -nc -O {filename} {url}"
        print(f"  Baixando: {url}")
        os.system(cmd)



[tas] Localizando arquivos...


HTTPError: 422 Client Error: Unprocessable Content for url: https://esgf-node.ornl.gov/esgf-1-5-bridge?format=application%2Fsolr%2Bjson&limit=0&distrib=false&type=File&dataset_id=CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tas.gr.v20201001%7Ceagle.alcf.anl.gov&facets=%2A

In [19]:
dataset_mais_recente_por_variavel["tas"].json

{'mip_era': ['CMIP6'],
 'activity_drs': ['ScenarioMIP'],
 'institution_id': ['EC-Earth-Consortium'],
 'source_id': ['EC-Earth3'],
 'experiment_id': ['ssp585'],
 'member_id': ['r1i1p1f1'],
 'table_id': ['Amon'],
 'variable_id': ['tas'],
 'grid_label': ['gr'],
 'frequency': ['mon'],
 'realm': ['atmos'],
 'product': ['model-output'],
 'nominal_resolution': ['100 km'],
 'source_type': ['AOGCM'],
 'grid': ['T255L91'],
 'creation_date': '2019-09-27T17:17:08Z',
 'variant_label': ['r1i1p1f1'],
 'sub_experiment_id': ['none'],
 'further_info_url': ['https://furtherinfo.es-doc.org/CMIP6.EC-Earth-Consortium.EC-Earth3.ssp585.none.r1i1p1f1'],
 'activity_id': ['ScenarioMIP'],
 'data_specs_version': ['01.00.31'],
 'title': 'CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3.ssp585.r1i1p1f1.Amon.tas.gr',
 'experiment_title': ['update of RCP8.5 based on SSP5'],
 'data_node': 'eagle.alcf.anl.gov',
 'index_node': 'us-index',
 'master_id': 'CMIP6.ScenarioMIP.EC-Earth-Consortium.EC-Earth3.ssp585.r1i1p1f1.Amon.

In [29]:
# Conexão apenas para arquivos
conn_files = SearchConnection("https://esgf-data.dkrz.de/esg-search", distrib=True)
#conn_files = SearchConnection("https://esgf-node.llnl.gov/esg-search", distrib=False, timeout=300) #
#conn_files = SearchConnection("https://eagle.alcf.anl.gov/esg-search", distrib=False, timeout=300)

for var, dataset in dataset_mais_recente_por_variavel.items():
    #dataset_id = dataset.dataset_id
    dataset_id = dataset.dataset_id.split('|')[0]
    print(f"\n[{var}] Buscando arquivos para: {dataset_id}")

    # Criar contexto no nó de download com o mesmo dataset_id
    ctx = conn_files.new_context(project="CMIP6", dataset_id=dataset_id, type="File")

    try:
        files = list(ctx.search())
    except Exception as e:
        print(f"  Erro ao buscar arquivos: {e}")
        continue

    print(f"  Total de arquivos encontrados: {len(files)}")

    files_http = [f for f in files if "HTTPServer" in f.json.get("access", [])]

    if not files_http:
        print(f"  Nenhum arquivo com acesso HTTP disponível para {var}")
        continue

    for f in files_http:
        url = f.download_url()
        filename = os.path.join(download_dir, var, os.path.basename(url))
        os.makedirs(os.path.dirname(filename), exist_ok=True)

        cmd = f"wget -nc -O \"{filename}\" {url}"
        print(f"  Baixando: {url}")
        os.system(cmd)



[tas] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tas.gr.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para tas

[tasmin] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tasmin.gr.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para tasmin

[tasmax] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tasmax.gr.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para tasmax

[tos] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Omon.tos.gn.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para tos

[ps] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Emon.ps.gr.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para ps

[pr] Buscando arquivos para: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.pr.gr.v20201001



-------------------------------------------------------------------------------

This behavior is kept for backward-compatibility, but ESGF indexes might not
successfully perform a distributed search when this option is used, so some
results may be missing.  For full results, it is recommended to pass a list of
facets of interest when instantiating a context object.  For example,

      ctx = conn.new_context(facets='project,experiment_id')

Only the facets that you specify will be present in the facets_counts dictionary.

or explicitly use  conn.new_context(facets='*')

-------------------------------------------------------------------------------


  Total de arquivos encontrados: 0
  Nenhum arquivo com acesso HTTP disponível para pr


In [31]:
dataset_mais_recente_por_variavel["tas"].json

{'mip_era': ['CMIP6'],
 'activity_drs': ['CMIP'],
 'institution_id': ['EC-Earth-Consortium'],
 'source_id': ['EC-Earth3'],
 'experiment_id': ['historical'],
 'member_id': ['r5i1p1f1'],
 'table_id': ['Amon'],
 'variable_id': ['tas'],
 'grid_label': ['gr'],
 'frequency': ['mon'],
 'realm': ['atmos'],
 'product': ['model-output'],
 'nominal_resolution': ['100 km'],
 'source_type': ['AOGCM'],
 'grid': ['T255L91'],
 'creation_date': '2020-09-27T04:59:24Z',
 'variant_label': ['r5i1p1f1'],
 'sub_experiment_id': ['none'],
 'further_info_url': ['https://furtherinfo.es-doc.org/CMIP6.EC-Earth-Consortium.EC-Earth3.historical.none.r5i1p1f1'],
 'activity_id': ['CMIP'],
 'data_specs_version': ['01.00.32'],
 'title': 'CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.tas.gr',
 'experiment_title': ['all-forcing simulation of the recent past'],
 'data_node': 'eagle.alcf.anl.gov',
 'index_node': 'us-index',
 'master_id': 'CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.t

#### tas | UKESM1-0-LL | tutorial

In [12]:
conn = SearchConnection('https://esgf-data.dkrz.de/esg-search', distrib=True)
ctx = conn.new_context(
    project='CMIP6',
    source_id='UKESM1-0-LL',
    experiment_id='historical',
    variable='tas',
    frequency='mon',
    variant_label='r1i1p1f2',
    data_node='esgf-data3.ceda.ac.uk')
ctx.hit_count

0

In [11]:
result = ctx.search()[0]
result.dataset_id

IndexError: list index out of range

In [14]:
import os
if 'HOME' not in os.environ:
    os.environ['HOME'] = os.environ.get('USERPROFILE', 'C:\\Users\\thais')

from pyesgf.logon import LogonManager

lm = LogonManager()
lm.logoff()
print("Logado?", lm.is_logged_on())

myproxy_host = 'esgf-data.dkrz.de'
lm.logon(username=None, password=None, hostname=myproxy_host)  # Isso abrirá um prompt interativo
print("Logado após autenticação?", lm.is_logged_on())

Logado? False
Enter myproxy username: 

TimeoutError: [WinError 10060] Uma tentativa de conexão falhou porque o componente conectado não respondeu
corretamente após um período de tempo ou a conexão estabelecida falhou
porque o host conectado não respondeu

#### tas | EC-Earth3

In [61]:
def parse_dataset_id(dataset_id):
    # Exemplo: CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.pr.gr.v20201001
    parts = dataset_id.split('.')
    return {
        "project": parts[0],
        "activity_id": parts[1],          # CMIP
        "institution_id": parts[2],       # EC-Earth-Consortium
        "source_id": parts[3],            # EC-Earth3
        "experiment_id": parts[4],        # historical
        "member_id": parts[5],            # r5i1p1f1
        "table_id": parts[6],             # Amon
        "variable_id": parts[7],          # pr
        "grid_label": parts[8],           # gr
        "version": parts[9][1:]           # remove 'v' antes do número da versão
    }


dataset_id = "CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r5i1p1f1.Amon.pr.gr.v20201001"
facets = parse_dataset_id(dataset_id)

ctx_files = conn_files.new_context(
    project=facets["project"],
    source_id=facets["source_id"],
    experiment_id=facets["experiment_id"],
    member_id=facets["member_id"],
    table_id=facets["table_id"],
    variable_id=facets["variable_id"],
    grid_label=facets["grid_label"],
    version=facets["version"],
    type="File"
)

files = list(ctx_files.search())

if not files:
    print("Nenhum arquivo encontrado.")
else:
    for f in files:
        print(f.download_url(), f.json.get("data_node", "n/a"))


HTTPError: 422 Client Error: Unprocessable Content for url: https://esgf-node.ornl.gov/esgf-1-5-bridge?format=application%2Fsolr%2Bjson&limit=0&distrib=true&type=Dataset&project=CMIP6&source_id=EC-Earth3&experiment_id=historical&member_id=r5i1p1f1&table_id=Amon&variable_id=pr&grid_label=gr&version=20201001&facets=%2A

In [62]:
ctx_files = conn_files.new_context(
    project="CMIP6",
    source_id="EC-Earth3",
    experiment_id="historical",
    variable_id="pr",
    table_id="Amon",
    type="File",
    distrib=False
)

files = list(ctx_files.search())

# Filtra pela versão e outros critérios se quiser
desired_version = "20201001"
files_filtered = [f for f in files if f.json.get("version") == desired_version]

print(f"Arquivos encontrados: {len(files_filtered)}")
for f in files_filtered:
    print(f.download_url(), f.json.get("data_node", "n/a"))


HTTPError: 422 Client Error: Unprocessable Content for url: https://esgf-node.ornl.gov/esgf-1-5-bridge?format=application%2Fsolr%2Bjson&limit=0&distrib=true&type=Dataset&project=CMIP6&source_id=EC-Earth3&experiment_id=historical&variable_id=pr&table_id=Amon&facets=%2A

In [21]:
infos_datasets

{'tas': <pyesgf.search.results.DatasetResult at 0x15bb28d6790>,
 'tasmin': <pyesgf.search.results.DatasetResult at 0x15bb257a7d0>,
 'tasmax': <pyesgf.search.results.DatasetResult at 0x15bb26ea850>,
 'tos': <pyesgf.search.results.DatasetResult at 0x15bb2855f10>,
 'ps': <pyesgf.search.results.DatasetResult at 0x15bb287db90>,
 'pr': <pyesgf.search.results.DatasetResult at 0x15bb285b5d0>}

In [17]:
node = "eagle.alcf.anl.gov"
conn_files = SearchConnection("https://esgf-node.llnl.gov/esg-search", distrib=True)

ctx = conn_files.new_context(
    project="CMIP6",
    source_id="EC-Earth3",
    experiment_id="ssp585",
    variable_id="pr",
    frequency="mon",
    variant_label="r4i1p1f1",
    data_node=node
)
ctx.hit_count

HTTPError: 422 Client Error: Unprocessable Content for url: https://esgf-node.ornl.gov/esgf-1-5-bridge?format=application%2Fsolr%2Bjson&limit=0&distrib=true&type=Dataset&project=CMIP6&source_id=EC-Earth3&experiment_id=ssp585&variable_id=pr&frequency=mon&variant_label=r4i1p1f1&data_node=eagle.alcf.anl.gov&facets=%2A

In [None]:
# pr
conn = SearchConnection("https://esgf-data.dkrz.de/esg-search", distrib=False)

ctx = conn.new_context(
    project="CMIP6",
    source_id="EC-Earth3",
    #experiment_id="ssp585",
    experiment_id="ssp585",
    variable_id="pr",
    table_id="Amon",
    frequency="mon",
    variant_label="r4i1p1f1"
)

print(f"datasets encontrados: {ctx.hit_count}")

datasets encontrados: 1


In [15]:
# tas
conn = SearchConnection("https://esgf-data.dkrz.de/esg-search", distrib=False)

ctx = conn.new_context(
    project="CMIP6",
    source_id="EC-Earth3",
    #experiment_id="ssp585",
    experiment_id="historical",
    variable_id="tas",
    table_id="Amon",
    frequency="mon",
    variant_label="r110i1p1f1"
)

print(f"datasets encontrados: {ctx.hit_count}")

datasets encontrados: 1


In [16]:
infos_datasets["tas"][0].json

NameError: name 'infos_datasets' is not defined

In [17]:
result = ctx.search()[0]
result.dataset_id

files = result.file_context().search()
for file in files:
    print(file.opendap_url)

http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197001-197012.nc
http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197101-197112.nc
http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197201-197212.nc
http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197301-197312.nc
http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197401-197412.nc
http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1

In [20]:
import socket

def test_connection(host, port):
    try:
        with socket.create_connection((host, port), timeout=10):
            print(f"Conexão bem-sucedida com {host}:{port}")
    except Exception as e:
        print(f"Falha na conexão com {host}:{port} → {e}")

test_connection("esgf-data.dkrz.de", 7512)
test_connection("esgf-node.llnl.gov", 7512)

Falha na conexão com esgf-data.dkrz.de:7512 → timed out
Falha na conexão com esgf-node.llnl.gov:7512 → timed out


In [23]:
first_file = files[0]
print("Download URL:", first_file.download_url)

Download URL: http://esgf3.dkrz.de/thredds/fileServer/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197001-197012.nc


In [24]:
import requests

url = first_file.download_url
response = requests.get(url)

with open('tas_example.nc', 'wb') as f:
    f.write(response.content)

In [None]:
import xarray as xr

ds = xr.open_dataset("tas_example.nc")
print(ds)

OSError: [Errno -101] NetCDF: HDF error: '\\\\wsl.localhost\\Ubuntu\\home\\thais\\climate-ingestion\\climate_data_processing\\tas_example.nc'

In [25]:
import xarray as xr

ds = xr.open_dataset(first_file.opendap_url)
print(ds)

OSError: [Errno -90] NetCDF: file not found: 'http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_197001-197012.nc'

In [22]:
result.json

{'id': 'CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r110i1p1f1.Amon.tas.gr.v20200412|esgf3.dkrz.de',
 'version': '20200412',
 'access': ['HTTPServer', 'GridFTP', 'OPENDAP', 'Globus'],
 'activity_drs': ['CMIP'],
 'activity_id': ['CMIP'],
 'cf_standard_name': ['air_temperature'],
 'citation_url': ['http://cera-www.dkrz.de/WDCC/meta/CMIP6/CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3.historical.r110i1p1f1.Amon.tas.gr.v20200412.json'],
 'data_node': 'esgf3.dkrz.de',
 'data_specs_version': ['01.00.30'],
 'dataset_id_template_': ['%(mip_era)s.%(activity_drs)s.%(institution_id)s.%(source_id)s.%(experiment_id)s.%(member_id)s.%(table_id)s.%(variable_id)s.%(grid_label)s'],
 'datetime_start': '1970-01-16T12:00:00Z',
 'datetime_stop': '2014-12-16T12:00:00Z',
 'directory_format_template_': ['%(root)s/%(mip_era)s/%(activity_drs)s/%(institution_id)s/%(source_id)s/%(experiment_id)s/%(member_id)s/%(table_id)s/%(variable_id)s/%(grid_label)s/%(version)s'],
 'east_degrees': 359.29688,
 'experiment_id':

In [17]:
ds = xr.open_dataset(files[10].opendap_url, chunks={"time": 120})
print(ds)

OSError: [Errno -90] NetCDF: file not found: 'http://esgf3.dkrz.de/thredds/dodsC/cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/historical/r110i1p1f1/Amon/tas/gr/v20200412/tas_Amon_EC-Earth3_historical_r110i1p1f1_gr_198001-198012.nc'

In [None]:
# Conexão ao ESGF LLNL
conn = SearchConnection("https://esgf-node.llnl.gov/esg-search", distrib=False)

# Modelos e variáveis de interesse
modelos = ["EC-Earth3", "CanESM5", "MPI-ESM1-2-LR"]
variaveis = ["tas", "tasmin", "tasmax", "tos", "ps", "pr"]
experimentos = ["historical", "ssp245", "ssp585"]

# Dicionário para armazenar os resultados
resultados = defaultdict(lambda: defaultdict(set))

# Loop por variável e modelo
for var in variaveis:
    for modelo in modelos:
        ctx = conn.new_context(
            project="CMIP6",
            experiment_id=experimentos,
            variable_id=var,
            source_id=modelo,
            frequency="mon",
            replica=False,
            latest=True
        )
        results = list(ctx.search(ignore_facet_check=True))
        for d in results:
            experimento = d.json.get("experiment_id", [None])[0]
            if experimento:
                resultados[modelo][experimento].add(var)

# Exibição dos resultados
for modelo, exps in resultados.items():
    print(f"\nModelo: {modelo}")
    for exp, vars in exps.items():
        print(f"  Experimento: {exp} → Variáveis: {sorted(vars)}")


Modelo: EC-Earth3
  Experimento: ssp585 → Variáveis: ['pr', 'ps', 'tas', 'tasmax', 'tasmin', 'tos']

Modelo: CanESM5
  Experimento: ssp585 → Variáveis: ['pr', 'ps', 'tas', 'tasmax', 'tasmin', 'tos']

Modelo: MPI-ESM1-2-LR
  Experimento: ssp585 → Variáveis: ['pr', 'ps', 'tas', 'tasmax', 'tasmin', 'tos']
