# Prepare Inputs

This notebook fetches the external data and updates the copies stored in this repository.

1. Fetch the Google Sheet containing the list of products and descriptions
   - Store the data in `input/overview.csv`

In [None]:
import os
import requests
import pooch
import json
import pandas as pd
import cdflib

## Fetch Google Doc (products overview)

In [None]:
DOC_URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vStz17Gi-O3tJjWcT_F0zYj4eCVuiiaU9ewpKTLlu_qRak-Cd0NHG3oQa0lcVFmWC2TFK3ecZHvdPxT/pub?output=xlsx"
CSV_PATH = os.path.abspath("input/overview.csv")

In [None]:
def load_google_sheet(url=DOC_URL):
    xl_doc = requests.get(url).content
    overview = pd.read_excel(xl_doc, "Overview", header=1)
    overview = overview.set_index("Name").fillna("-")
    # names = list(overview.index.dropna())
    # details = {}
    # missing_sheets = []
    # for name in names:
    #     try:
    #         details[name] = pd.read_excel(xl_doc, name).set_index("FIELD", drop=False).fillna("-")
    #     except Exception:
    #         missing_sheets.append(name)
    #         details[name] = pd.DataFrame()
    return overview

In [None]:
overview = load_google_sheet(DOC_URL)

In [None]:
overview.head()

In [None]:
overview.to_csv(CSV_PATH)

## Fetch VirES `product_types.json` (tables of metadata for variables in products)

In [None]:
PRODUCT_TYPES_URL = "https://raw.githubusercontent.com/ESA-VirES/VirES-Server/staging/vires/vires/data/product_types.json"

In [None]:
def load_product_types_json(url=PRODUCT_TYPES_URL):
    json_content = json.loads(
        requests.get(url).content
    )
    names = [content["name"] for content in json_content]
    product_metadata = {}
    for name, product_content in zip(names, json_content):
        product_metadata[name] = product_content
    return product_metadata

In [None]:
product_metadata = load_product_types_json(PRODUCT_TYPES_URL)

In [None]:
# Mapping between names used in json file and our csv
names_short_to_long = {
    "MODx_SC_1B": 'SW_MODx_SC_1B',
    "MAGx_LR_1B": 'SW_MAGx_LR_1B',
    "MAGx_HR_1B": 'SW_MAGx_HR_1B',
    "EFIx_LP_1B": 'SW_EFIx_LP_1B',
    "IBIxTMS_2F": 'SW_IBIxTMS_2F',
    "EEFxTMS_2F": 'SW_EEFxTMS_2F',
    "FACxTMS_2F": 'SW_FACxTMS_2F',
    "TECxTMS_2F": 'SW_TECxTMS_2F',
    "IPDxIRR_2F": 'SW_IPDxIRR_2F',
    # "": 'SW_AUX_IMF_2_',
    # "AEJxLPL_2F": 'SW_AEJxLPL_2F',
    # "AEJxPBL_2F": 'SW_AEJxPBL_2F',
    # "AEJxLPS_2F": 'SW_AEJxLPS_2F',
    # "AEJxPBS_2F": 'SW_AEJxPBS_2F',
    # "AOBxFAC_2F": 'SW_AOBxFAC_2F',
    # "MITx_LP_2F": 'SW_MITx_LP_2F',
    # "MITxTEC_2F": 'SW_MITxTEC_2F',
    # "PPIxFAC_2F": 'SW_PPIxFAC_2F',
    # "": 'OMNI_HR_1min',
    # "AUX_OBSS2_": 'SW_AUX_OBSx2_',
    # "AUX_OBSM2_": 'SW_AUX_OBSx2_',
    # "AUX_OBSH2_": 'SW_AUX_OBSH2_',
    # "VOBS_1M_2_": 'SW_VOBS_xM_2_',
    # "VOBS_4M_2_": 'SW_VOBS_xM_2_',
    # "": 'GRACE_x_MAG',
    # "": 'GFx_FGM_ACAL',
    # "": 'CS_MAG'
}

In [None]:
# Each collection can contain subcollections
for name, content in product_metadata.items():
    print(content["datasets"].keys())