# Recent Shocks Vulnerability Indicator

Notebook environment to migrate EM-DAT csv file to CF compliant zarr

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

<IPython.core.display.Javascript object>

### Configure OS independent paths

In [2]:
#%pip install tqdm

<IPython.core.display.Javascript object>

In [3]:
# Import standard packages
import os
import pathlib

import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import math
from tqdm import tqdm 

from coclicodata.drive_config import p_drive
from coclicodata.etl.cf_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
gca_data_dir = r"P:\11209197-018-global-coastal-atlas\MSc_students\ClenmarRowe\Data\All_Datasets\Orig_Datasets"


# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    pathlib.Path().home().joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"AppData\Local\miniconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml" ###Changed from anaconda to miniconda  - added the new package udunits file
    )
)

<IPython.core.display.Javascript object>

In [4]:
# Project paths & files (manual input)
dataset_dir = pathlib.Path().joinpath(gca_data_dir,"03_Vulnerability","Recent_Shocks")
dataset_dir_path = dataset_dir.joinpath("Recent_Shocks_original.nc")
CF_dir = dataset_dir.joinpath("CF")  # directory to save output CF check files
template_path= pathlib.Path().joinpath(gca_data_dir,r"04_Auxillary_files\Arjen_Vector_Template")
dataset_dir_path

WindowsPath('P:/11209197-018-global-coastal-atlas/MSc_students/ClenmarRowe/Data/All_Datasets/Orig_Datasets/03_Vulnerability/Recent_Shocks/Recent_Shocks_original.nc')

<IPython.core.display.Javascript object>

In [5]:
df_template=pd.read_csv(template_path)
df_template

Unnamed: 0,transect_id,country_id,continent,country_name,Start_lon,Start_lat,Intersect_lon,Intersect_lat,End_lon,End_lat
0,BOX_028_183_0,CHL,South America,Chile,-74.386310,-50.377659,-74.390966,-50.382558,-74.395623,-50.387456
1,BOX_028_183_1,CHL,South America,Chile,-74.382469,-50.379144,-74.387125,-50.384042,-74.391782,-50.388940
2,BOX_028_183_2,CHL,South America,Chile,-74.378628,-50.380629,-74.383284,-50.385527,-74.387941,-50.390425
3,BOX_028_183_3,CHL,South America,Chile,-74.373950,-50.382583,-74.379517,-50.387079,-74.385083,-50.391574
4,BOX_028_183_4,CHL,South America,Chile,-74.370425,-50.384358,-74.375991,-50.388853,-74.381558,-50.393348
...,...,...,...,...,...,...,...,...,...,...
1739821,BOX_211_067_149,RUS,Europe,Russia,39.929937,64.701462,39.935198,64.698350,39.940460,64.695238
1739822,BOX_211_067_150,RUS,Europe,Russia,39.933577,64.702586,39.938839,64.699474,39.944100,64.696363
1739823,BOX_211_067_151,RUS,Europe,Russia,39.935546,64.703502,39.942003,64.700833,39.948460,64.698164
1739824,BOX_211_067_152,RUS,Europe,Russia,39.937050,64.704370,39.944697,64.702356,39.952343,64.700341


<IPython.core.display.Javascript object>

In [6]:
country_temp=df_template.columns[3]
unique_temp=df_template[country_temp].unique()
unique_temp

array(['Chile', 'unknown', 'Argentina', 'Falkland Islands',
       'South Georgia and South Sandwich Islands',
       'French Southern and Antarctic Lands', 'New Zealand', 'Uruguay',
       'Brazil', 'Saint Helena', 'South Africa', 'Australia', 'Tonga',
       'Fiji', 'French Polynesia', 'Cook Islands', 'Peru', 'Namibia',
       'Mozambique', 'Madagascar', 'France', 'Mauritius', 'New Caledonia',
       'Vanuatu', 'Samoa', 'American Samoa', 'Wallis and Futuna',
       'Kiribati', 'Ecuador', 'Gabon', 'Angola', 'Republic of Congo',
       'Democratic Republic of the Congo', 'United Republic of Tanzania',
       'Kenya', 'Comoros', 'Somalia', 'Seychelles',
       'British Indian Ocean Territory', 'Maldives', 'Indonesia',
       'East Timor', 'Papua New Guinea', 'Solomon Islands', 'Mexico',
       'Guatemala', 'El Salvador', 'Panama', 'Colombia', 'Belize',
       'Honduras', 'Costa Rica', 'Nicaragua', 'Venezuela', 'Guyana',
       'Trinidad and Tobago', 'Saint Kitts and Nevis', 'Suriname',


<IPython.core.display.Javascript object>

In [7]:
# Data from other dataset input here
df_RS=pd.read_csv(dataset_dir.joinpath("public_emdat_custom_request_2024-01-28.csv"),sep=";")
df_RS

Unnamed: 0,DisNo.,Historic,Classification Key,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,External IDs,Event Name,ISO,...,Reconstruction Costs ('000 US$),"Reconstruction Costs, Adjusted ('000 US$)",Insured Damage ('000 US$),"Insured Damage, Adjusted ('000 US$)",Total Damage ('000 US$),"Total Damage, Adjusted ('000 US$)",CPI,Admin Units,Entry Date,Last Update
0,1999-9388-DJI,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,DJI,...,,,,,,,6050357889,"[{""adm1_code"":1093,""adm1_name"":""Ali Sabieh""},{...",2006-03-01,2023-09-25
1,1999-9388-SDN,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SDN,...,,,,,,,5884064849,"[{""adm1_code"":2757,""adm1_name"":""Northern Darfu...",2006-03-08,2023-09-25
2,1999-9388-SOM,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SOM,...,,,,,,,5884064849,"[{""adm1_code"":2691,""adm1_name"":""Bay""},{""adm1_c...",2006-03-08,2023-09-25
3,2000-0002-AGO,No,nat-hyd-flo-riv,Natural,Hydrological,Flood,Riverine flood,,,AGO,...,,,,,10000,16995.0,5884064849,"[{""adm2_code"":4214,""adm2_name"":""Baia Farta""},{...",2005-02-03,2023-09-25
4,2000-0003-BGD,No,nat-met-ext-col,Natural,Meteorological,Extreme temperature,Cold wave,,,BGD,...,,,,,,,5884064849,"[{""adm1_code"":575,""adm1_name"":""Barisal""},{""adm...",2003-07-01,2023-09-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9867,2023-9234-ESP,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,ESP,...,,,300000,,2400000,,,"[{""adm1_code"":2716,""adm1_name"":""Andalucía""},{""...",2023-04-19,2024-01-03
9868,2023-9494-IDN,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,GLIDE:DR-2023-000154,,IDN,...,,,,,,,,"[{""adm1_code"":73617,""adm1_name"":""Papua""}]",2023-08-02,2023-09-26
9869,2023-9589-URY,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,GLIDE:DR-2023-000010,,URY,...,,,,,,,,,2023-09-05,2023-12-08
9870,2023-9651-BRA,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,BRA,...,,,,,,,,,2023-10-11,2023-12-08


<IPython.core.display.Javascript object>

In [8]:
# Original dataset dependent

df_RS=df_RS[[df_RS.columns[10],df_RS.columns[25],df_RS.columns[31]]]
#country_list_RS=df_RS[country_column_RS]
df_RS


Unnamed: 0,Country,Start Year,Total Deaths
0,Djibouti,2001,
1,Sudan,2000,
2,Somalia,2000,21.0
3,Angola,2000,31.0
4,Bangladesh,2000,49.0
...,...,...,...
9867,Spain,2023,
9868,Indonesia,2023,
9869,Uruguay,2023,
9870,Brazil,2023,


<IPython.core.display.Javascript object>

In [9]:
df_RS["Country"].unique()

array(['Djibouti', 'Sudan', 'Somalia', 'Angola', 'Bangladesh',
       'Guatemala', 'Iran (Islamic Republic of)', 'Mozambique',
       'South Africa', 'Brazil', 'India', 'United States of America',
       'Romania', 'Bulgaria', 'China', 'Réunion', 'Philippines',
       'Botswana', 'Mongolia', 'Russian Federation', 'Congo',
       'Afghanistan', 'Ethiopia', 'United Republic of Tanzania',
       'Lesotho', 'Kenya', 'Bosnia and Herzegovina', 'Iceland', 'Namibia',
       'Colombia', 'Indonesia', 'Zimbabwe', 'Egypt', 'Israel', 'Jordan',
       'Eswatini', 'Australia', 'Zambia', 'Madagascar', 'Nigeria',
       'Mexico', 'Argentina', 'Malawi', 'Pakistan',
       'Democratic Republic of the Congo', 'Peru',
       'Bolivia (Plurinational State of)', 'Chad', 'Austria', 'Niger',
       'Malaysia', 'Cameroon', 'Czechia', 'Japan',
       'Central African Republic', 'Benin', 'Türkiye', 'Hungary',
       'Republic of Korea', 'Serbia Montenegro', 'Ecuador',
       'Saudi Arabia', 'Honduras', 'French Gu

<IPython.core.display.Javascript object>

In [10]:
# Switch names that are similar but problematic

name_mapping = {

    "Saint Martin (French Part)":	"Saint Martin",
    "Viet Nam"	:"Vietnam",
    "Guinea-Bissau"	:"Guinea Bissau",
    "Bahamas"	:"The Bahamas",
    "Taiwan (Province of China)"	:"Taiwan",
    "Saint Barthélemy":	"Saint Barthelemy",
    "United Kingdom of Great Britain and Northern Ireland"	:"United Kingdom",
    "Republic of Korea"	:"South Korea",
    "Cabo Verde"	:"Cape Verde",
    "Netherlands (Kingdom of the)"	:"Netherlands",
    "China, Hong Kong Special Administrative Region"	:"Hong Kong S.A.R.",
    "Türkiye"	:"Turkey",
    "Micronesia (Federated States of)":	"Federated States of Micronesia",
    "Russian Federation":	"Russia",
    "Iran (Islamic Republic of)":	"Iran",
    "Democratic People's Republic of Korea"	:"North Korea",
    "Congo":	"Republic of Congo",
    "Wallis and Futuna Islands":	"Wallis and Futuna",
    "Venezuela (Bolivarian Republic of)":	"Venezuela",
    "Syrian Arab Republic":	"Syria",
    "Timor-Leste"	:"East Timor",
    "Côte d’Ivoire"	:"Ivory Coast",
     "Côte d'Ivoire"	:"Ivory Coast",

}

<IPython.core.display.Javascript object>

In [11]:
df_RS["Country"]=df_RS["Country"].replace(name_mapping)
unique_RS= df_RS["Country"].unique()
unique_RS

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_RS["Country"]=df_RS["Country"].replace(name_mapping)


array(['Djibouti', 'Sudan', 'Somalia', 'Angola', 'Bangladesh',
       'Guatemala', 'Iran', 'Mozambique', 'South Africa', 'Brazil',
       'India', 'United States of America', 'Romania', 'Bulgaria',
       'China', 'Réunion', 'Philippines', 'Botswana', 'Mongolia',
       'Russia', 'Republic of Congo', 'Afghanistan', 'Ethiopia',
       'United Republic of Tanzania', 'Lesotho', 'Kenya',
       'Bosnia and Herzegovina', 'Iceland', 'Namibia', 'Colombia',
       'Indonesia', 'Zimbabwe', 'Egypt', 'Israel', 'Jordan', 'Eswatini',
       'Australia', 'Zambia', 'Madagascar', 'Nigeria', 'Mexico',
       'Argentina', 'Malawi', 'Pakistan',
       'Democratic Republic of the Congo', 'Peru',
       'Bolivia (Plurinational State of)', 'Chad', 'Austria', 'Niger',
       'Malaysia', 'Cameroon', 'Czechia', 'Japan',
       'Central African Republic', 'Benin', 'Turkey', 'Hungary',
       'South Korea', 'Serbia Montenegro', 'Ecuador', 'Saudi Arabia',
       'Honduras', 'French Guiana', 'Kazakhstan', 'Burundi

<IPython.core.display.Javascript object>

In [12]:
# choose what year is  "Recent"
#Calculate the total sum of days along with the country
recent=2004
sum=0
td=[]
cont=[]
for country in unique_RS:
    sum = 0
    for i in range(len(df_RS)):
        if df_RS["Country"].iloc[i] == country and df_RS["Start Year"].iloc[i] >= recent and not math.isnan(df_RS["Total Deaths"].iloc[i]):
            
            sum += df_RS["Total Deaths"].iloc[i]
    cont.append(country)
    td.append(sum)

<IPython.core.display.Javascript object>

In [13]:
RS_counter = pd.DataFrame({"Country": cont, "Total Deaths": td})
RS_counter

Unnamed: 0,Country,Total Deaths
0,Djibouti,78.0
1,Sudan,4613.0
2,Somalia,23087.0
3,Angola,4685.0
4,Bangladesh,10019.0
...,...,...
213,British Virgin Islands,9.0
214,United Arab Emirates,0.0
215,Qatar,0.0
216,Isle of Man,0.0


<IPython.core.display.Javascript object>

In [14]:
good_list=[]

for i in range(0,len(unique_temp)):
    for j in range(0,len(unique_RS)):
        if unique_temp[i]==unique_RS[j]:
            good_list.append(unique_temp[i])
good_list
            
len(good_list)


167

<IPython.core.display.Javascript object>

In [15]:
#Create a bad
bad_list_temp=set(unique_temp).difference(set(good_list))
bad_list_RS=set(unique_RS).difference(set(good_list))
len(bad_list_RS)

51

<IPython.core.display.Javascript object>

In [16]:


#to dataframe
bad_list_RS_eye=pd.DataFrame({"Recent Shock2": list(bad_list_RS)})
bad_list_temp_eye=pd.DataFrame({"Arjen Template2": list(bad_list_temp)})

#to excel for eye check
bad_list_RS_eye.to_excel(dataset_dir.joinpath("eye_checker_RS.xlsx"))
bad_list_temp_eye.to_excel(dataset_dir.joinpath("eye_checker_temp.xlsx"))

<IPython.core.display.Javascript object>

In [17]:
RS_counter

Unnamed: 0,Country,Total Deaths
0,Djibouti,78.0
1,Sudan,4613.0
2,Somalia,23087.0
3,Angola,4685.0
4,Bangladesh,10019.0
...,...,...
213,British Virgin Islands,9.0
214,United Arab Emirates,0.0
215,Qatar,0.0
216,Isle of Man,0.0


<IPython.core.display.Javascript object>

In [18]:
df_template.head()
# len(df_template)

Unnamed: 0,transect_id,country_id,continent,country_name,Start_lon,Start_lat,Intersect_lon,Intersect_lat,End_lon,End_lat
0,BOX_028_183_0,CHL,South America,Chile,-74.38631,-50.377659,-74.390966,-50.382558,-74.395623,-50.387456
1,BOX_028_183_1,CHL,South America,Chile,-74.382469,-50.379144,-74.387125,-50.384042,-74.391782,-50.38894
2,BOX_028_183_2,CHL,South America,Chile,-74.378628,-50.380629,-74.383284,-50.385527,-74.387941,-50.390425
3,BOX_028_183_3,CHL,South America,Chile,-74.37395,-50.382583,-74.379517,-50.387079,-74.385083,-50.391574
4,BOX_028_183_4,CHL,South America,Chile,-74.370425,-50.384358,-74.375991,-50.388853,-74.381558,-50.393348


<IPython.core.display.Javascript object>

In [19]:


# Merge df_template with RS_counter on the "Country" column
merged_df_RS = pd.merge(df_template, RS_counter, left_on="country_name", right_on="Country", how="left")

# Rename the merged column to "Recent_Shocks (Deaths)"
merged_df_RS.rename(columns={"Total Deaths": "Recent_Shocks (Deaths)"}, inplace=True)

# Fill NAs with "N/A" in the "Recent_Shocks (Deaths)" column
merged_df_RS["Recent_Shocks (Deaths)"] = merged_df_RS["Recent_Shocks (Deaths)"].fillna(0).astype('int64')
merged_df_RS["Recent_Shocks (Deaths)"] = merged_df_RS["Recent_Shocks (Deaths)"].fillna("N/A")

# Now merged_df_RS is the DataFrame with the appended total death data
merged_df_RS


Unnamed: 0,transect_id,country_id,continent,country_name,Start_lon,Start_lat,Intersect_lon,Intersect_lat,End_lon,End_lat,Country,Recent_Shocks (Deaths)
0,BOX_028_183_0,CHL,South America,Chile,-74.386310,-50.377659,-74.390966,-50.382558,-74.395623,-50.387456,Chile,1000
1,BOX_028_183_1,CHL,South America,Chile,-74.382469,-50.379144,-74.387125,-50.384042,-74.391782,-50.388940,Chile,1000
2,BOX_028_183_2,CHL,South America,Chile,-74.378628,-50.380629,-74.383284,-50.385527,-74.387941,-50.390425,Chile,1000
3,BOX_028_183_3,CHL,South America,Chile,-74.373950,-50.382583,-74.379517,-50.387079,-74.385083,-50.391574,Chile,1000
4,BOX_028_183_4,CHL,South America,Chile,-74.370425,-50.384358,-74.375991,-50.388853,-74.381558,-50.393348,Chile,1000
...,...,...,...,...,...,...,...,...,...,...,...,...
1739821,BOX_211_067_149,RUS,Europe,Russia,39.929937,64.701462,39.935198,64.698350,39.940460,64.695238,Russia,56657
1739822,BOX_211_067_150,RUS,Europe,Russia,39.933577,64.702586,39.938839,64.699474,39.944100,64.696363,Russia,56657
1739823,BOX_211_067_151,RUS,Europe,Russia,39.935546,64.703502,39.942003,64.700833,39.948460,64.698164,Russia,56657
1739824,BOX_211_067_152,RUS,Europe,Russia,39.937050,64.704370,39.944697,64.702356,39.952343,64.700341,Russia,56657


<IPython.core.display.Javascript object>

In [20]:

merged_df_RS.to_csv(dataset_dir.joinpath("Recent_Shocks_mapped_to_transect.csv"),index=False)

<IPython.core.display.Javascript object>

In [21]:
df=pd.read_csv(dataset_dir.joinpath("Recent_Shocks_mapped_to_transect.csv"))
df

Unnamed: 0,transect_id,country_id,continent,country_name,Start_lon,Start_lat,Intersect_lon,Intersect_lat,End_lon,End_lat,Country,Recent_Shocks (Deaths)
0,BOX_028_183_0,CHL,South America,Chile,-74.386310,-50.377659,-74.390966,-50.382558,-74.395623,-50.387456,Chile,1000
1,BOX_028_183_1,CHL,South America,Chile,-74.382469,-50.379144,-74.387125,-50.384042,-74.391782,-50.388940,Chile,1000
2,BOX_028_183_2,CHL,South America,Chile,-74.378628,-50.380629,-74.383284,-50.385527,-74.387941,-50.390425,Chile,1000
3,BOX_028_183_3,CHL,South America,Chile,-74.373950,-50.382583,-74.379517,-50.387079,-74.385083,-50.391574,Chile,1000
4,BOX_028_183_4,CHL,South America,Chile,-74.370425,-50.384358,-74.375991,-50.388853,-74.381558,-50.393348,Chile,1000
...,...,...,...,...,...,...,...,...,...,...,...,...
1739821,BOX_211_067_149,RUS,Europe,Russia,39.929937,64.701462,39.935198,64.698350,39.940460,64.695238,Russia,56657
1739822,BOX_211_067_150,RUS,Europe,Russia,39.933577,64.702586,39.938839,64.699474,39.944100,64.696363,Russia,56657
1739823,BOX_211_067_151,RUS,Europe,Russia,39.935546,64.703502,39.942003,64.700833,39.948460,64.698164,Russia,56657
1739824,BOX_211_067_152,RUS,Europe,Russia,39.937050,64.704370,39.944697,64.702356,39.952343,64.700341,Russia,56657


<IPython.core.display.Javascript object>

In [22]:
len(df["Recent_Shocks (Deaths)"].unique())

135

<IPython.core.display.Javascript object>

In [23]:
len(df["country_name"].unique())

184

<IPython.core.display.Javascript object>

In [24]:
# Convert the pandas dataframe to an xarray dataset
ds = xr.Dataset.from_dataframe(df)
ds


<IPython.core.display.Javascript object>

In [25]:

os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'
# Write the xarray dataset to a netCDF file
ds.to_netcdf(dataset_dir_path)

<IPython.core.display.Javascript object>

### Check CF compliancy original NetCDF files

In [26]:
# open datasets
ds = xr.open_dataset(dataset_dir_path)

# check original dataset
ds

<IPython.core.display.Javascript object>

In [27]:
ds = ds.drop_vars(['Country'])
ds

<IPython.core.display.Javascript object>

In [28]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile= dataset_dir_path, 
                 working_dir=CF_dir
                 )


<IPython.core.display.Javascript object>

In [29]:
# save original CF compliancy
save_compliancy(cap, testfile=dataset_dir_path, working_dir=CF_dir)



<IPython.core.display.Javascript object>

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [30]:
# open original datasets
ds = xr.open_dataset(dataset_dir_path)

# check original dataset
ds

<IPython.core.display.Javascript object>

In [31]:
import json

# NetCDF attribute alterations
f_global = open(dataset_dir.joinpath("metadata_recent_shocks.json"))
meta_global = json.load(f_global)

for attr_name, attr_val in meta_global.items():
    if attr_name == 'PROVIDERS':
        attr_val = json.dumps(attr_val)
    ds.attrs[attr_name] = attr_val

ds.attrs['Conventions'] = "CF-1.8"
ds

<IPython.core.display.Javascript object>

In [32]:
# combine start and end coordinates into a transect
from shapely.geometry import LineString

start_lons = ds["Start_lon"].values
start_lats = ds["Start_lat"].values
end_lons = ds["End_lon"].values
end_lats = ds["End_lat"].values
coords = zip(zip(start_lons, start_lats), zip(end_lons, end_lats))

ds["transect_geom"] = (
    ["index"],
    [str(LineString(line)) for line in coords],
)
ds["transect_geom"].attrs["long_name"] = "Transect Geometry"

<IPython.core.display.Javascript object>

In [33]:
keep_vars = ['transect_id', 'country_name', 'continent', 'Intersect_lon', 'Intersect_lat', 'transect_geom', 'Recent_Shocks (Deaths)']
allvars = list(ds.keys())
delete_vars = list(set(allvars).difference(set(keep_vars)))

ds = ds.drop_vars(delete_vars)
ds

<IPython.core.display.Javascript object>

In [34]:
# NetCDF variable and dimension alterations

# rename or swap dimension names, the latter in case the name already exists as coordinate
ds = ds.rename_dims({"index": "nstations"})
ds


<IPython.core.display.Javascript object>

In [35]:
# change dtypes from unsigned to signed
object_vars = ['transect_id', 'country_name', 'continent', 'transect_geom']
for i in object_vars:
    ds[i] = ds[i].astype('S')
ds

<IPython.core.display.Javascript object>

In [36]:

# rename variables, if necessary
ds = ds.rename_vars(
       {"Intersect_lon": "lon", "Intersect_lat": "lat",
        "country_name": "country" ,
        "Recent_Shocks (Deaths)" : "recent_shocks"
       }
  )
ds

<IPython.core.display.Javascript object>

In [37]:


# add or change certain variable / coordinate attributes
### dataset attributes is a dictionary of dictionaries
dataset_attributes = {
    "lon": {"standard_name": "longitude", "long_name": "longitude", "units": "degrees_east"},
    "lat": {"standard_name": "latitude", "long_name": "latitude", "units": "degrees_north"},
    "transect_id": { "long_name": "Transect Identity", "units": "1"},
    "continent": { "long_name": "Continent", "units": "1"},
    "country": { "long_name": "Country", "units": "1"},
    "recent_shocks": { "long_name": "Recent_Shocks (Deaths)", "units": "1"}

}  # specify custom (CF convention) attributes

 # add / overwrite attributes
for k, v in dataset_attributes.items():
    try:
        ds[k].attrs = dataset_attributes[k]
    except:
        continue


ds

<IPython.core.display.Javascript object>

In [38]:
#Drop index from coordinates list
ds=ds.drop_vars(["index"])
ds


<IPython.core.display.Javascript object>

In [39]:

# set some data variables to coordinates to avoid duplication of dimensions in later stage
ds = ds.set_coords(["lon", "lat","transect_id", "country", "continent", "transect_geom"])
ds

<IPython.core.display.Javascript object>

In [40]:
ds

<IPython.core.display.Javascript object>

In [41]:
# Write the xarray dataset to a netCDF file
#Compliant netcdf
dataset_dir_path_CF=str(dataset_dir_path).replace("original","final")


ds.to_netcdf(path=dataset_dir_path_CF)

<IPython.core.display.Javascript object>

### Check CF compliancy altered NetCDF files

In [42]:
# open datasets (only first file, rest is the same)
ds = xr.open_dataset(dataset_dir_path_CF)

# check original dataset
ds

<IPython.core.display.Javascript object>

In [43]:
%%capture cap --no-stderr
# check original CF compliancy (for first file)

check_compliancy(testfile=dataset_dir_path_CF, 
                 working_dir=CF_dir
                 )

<IPython.core.display.Javascript object>

In [44]:
# save original CF compliancy (for first file)
save_compliancy(cap, testfile=dataset_dir_path_CF, working_dir=CF_dir)



<IPython.core.display.Javascript object>

### write data to Zarr files

In [45]:
# export to zarr in write mode (to overwrite if exists)
ds.to_zarr(str(dataset_dir_path).replace("original","final").replace(".nc", ".zarr"), mode="w")

<xarray.backends.zarr.ZarrStore at 0x18d84605740>

<IPython.core.display.Javascript object>