# Clean & Reformat Data


In [1]:
import os
import json
import pandas as pd
import numpy as np
from pathlib import Path
from pprint import pprint
from netCDF4._netCDF4 import Dataset

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

In [2]:
original_data_dir = "data/raw"
datapaths = os.listdir(original_data_dir)

In [3]:
datainfo_list = list()

for p in datapaths:
    temp = dict()
    sp = p.split(".")
    temp["confidence"] = sp[0].replace("med", "medium")
    temp["scenario"] = sp[1].replace("ssp", "")
    temp["wiki"] = "https://www.wikiwand.com/en/Shared_Socioeconomic_Pathways"
    temp["doi"] = "https://doi.org/10.5281/zenodo.5914710"
    temp["download"] = "https://zenodo.org/records/5914710/files/ar6.zip?download=1"
    temp["local_datapath"] = f"./{original_data_dir}/{p}"
    temp[
        "original_datapath"
    ] = f"ar6/global/confidence_output_files/{temp['confidence']}_confidence/{sp[1]}/total_{sp[1]}_{temp['confidence']}_confidence_values.nc"
    datainfo_list.append(temp)

In [6]:
def parse_ssp(datainfo: dict) -> dict:
    out = dict()
    data = Dataset(datainfo["local_datapath"])
    out["confidence"] = datainfo["confidence"]
    out["scenario"] = datainfo["scenario"]
    out["metadata"] = dict()
    description = data.description
    description = description.replace(
        "confidence", f"{datainfo['confidence']} confidence"
    )
    description = description.replace("AR6", f"SSP {datainfo['scenario']} AR6")
    out["metadata"]["description"] = description
    out["metadata"]["reference"] = dict()
    out["metadata"]["reference"]["wiki"] = datainfo["wiki"]
    out["metadata"]["reference"]["doi"] = datainfo["doi"]
    out["metadata"]["download"] = dict()
    out["metadata"]["download"]["set"] = datainfo["download"]
    out["metadata"]["download"][
        "raw"
    ] = f"https://github.com/sunkcosts/ar6_slp_global_confidence_data/raw/main/data/raw/{out['confidence'][:3]}.ssp{out['scenario']}.nc"
    out["metadata"]["download"][
        "ssp"
    ] = f"https://github.com/sunkcosts/ar6_slp_global_confidence_data/raw/main/data/ssp/{out['confidence'][:3]}.{out['scenario']}.ssp"
    # out["metadata"]["download"]["raw.zenodo.full"]["path"] = datainfo["original_datapath"]

    years = [int(y) for y in list(data["years"][:].data)]
    quantiles = list(data["quantiles"][:].data)
    sea_level_change_mm = np.array(data["sea_level_change"]).T[0].astype(int)
    sea_level_change_mm = [
        [int(mm) for mm in list(row)] for row in list(sea_level_change_mm)
    ]
    # df.index = years
    # df = df.T.to_dict(orient="list")
    quantiles = [round(float(q), 4) for q in quantiles]
    out["data"] = dict()
    out["data"]["quantiles"] = quantiles
    out["data"]["years"] = years
    out["data"]["sea_level_change_mm"] = sea_level_change_mm
    return out

In [7]:
for di in datainfo_list:
    out = parse_ssp(di)
    with open(f"./data/ssp/{di['confidence'][:3]}.{di['scenario']}.ssp", "w") as f:
        f.write(json.dumps(out, indent=4))
    f.close()