In [1]:
# -*- coding: utf-8 -*-
"""
load TRW data from FE23 
downloaded from NCEI https://www.ncei.noaa.gov/access/paleo-search/study/36773
Created 25/10/2024 by Lucie Luecke 
21/11/2024 LL: added csv saving of compact dataframe, removed redundant output.



Here we extract a dataframe with the following columns:

columns=['archiveType', 
        'climateInterpretation_variable',
        'climateInterpretation_variableDetail',
        'datasetId',
        'dataSetName',                                                                                
        'geo_meanElev', 
        'geo_meanLat', 
        'geo_meanLon',
        'year', 'yearUnits',                                                                                         
        'paleoData_variableName',
        'paleoData_units',                                                                                           
        'paleoData_values',
        'paleoData_notes',
        'paleoData_sensorSpecies',
        'originalDataURL',
        'originalDatabase'
]

We save a standardised compact dataframe for concatenation to DoD2k

"""



"\nload TRW data from FE23 \ndownloaded from NCEI https://www.ncei.noaa.gov/access/paleo-search/study/36773\nCreated 25/10/2024 by Lucie Luecke \n21/11/2024 LL: added csv saving of compact dataframe, removed redundant output.\n\n\n\nHere we extract a dataframe with the following columns:\n\ncolumns=['archiveType', \n        'climateInterpretation_variable',\n        'climateInterpretation_variableDetail',\n        'datasetId',\n        'dataSetName',                                                                                \n        'geo_meanElev', \n        'geo_meanLat', \n        'geo_meanLon',\n        'year', 'yearUnits',                                                                                         \n        'paleoData_variableName',\n        'paleoData_units',                                                                                           \n        'paleoData_values',\n        'paleoData_notes',\n        'paleoData_sensorSpecies',\n        'originalDataUR

# Set up working environment

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature 
from matplotlib.gridspec import GridSpec as GS
import scipy.io as sio
from copy import deepcopy as dc

In [4]:
# set up working directory. 
# The default working directory should be the parent folder (compile_proxy_database) so we can access the 'helper' files 
# Make sure this is changing to the correct path!

#wdir = '/home/jupyter-lluecke/compile_proxy_database_v2.0' # working directory, this should work, but doesn't seem to...
if not os.getcwd().endswith('compile_proxy_database_v2.1'):
    os.chdir(os.getcwd()+'/..')
wdir = os.getcwd()
print('working directory: '+wdir)
import functions as f # contains functions for plotting 

working directory: /home/jupyter-lluecke/compile_proxy_database_v2.1


In [5]:

#%run -i functions.py'
import functions as f # Lucie's functions


# load the source data

In [6]:
vars = ['chronos', 'lonlat', 'investigator', 'trwsSm', 'chronology', 'country', 'species', 
        'elevation', 'sitename', 'treetime']

In [7]:
# # download and unzip FE23 
# !wget -nH -P fe23 https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/franke2022/franke2022-fe23.nc
# fe23_full  = xr.open_dataset('fe23/franke2022-fe23.nc')

# # # save slice of FE23 with only relevant variables as netCDF (fe23_full is 25GB)
# fe23_slice = fe23_full[['chronos', 'lonlat', 'investigator', 'trwsSm', 'chronology', 'country', 'species', 
#         'elevation', 'sitename', 'treetime']]
# fe23_slice.to_netcdf('fe23/franke2022-fe23_slice.nc')

In [8]:
fe23_slice = xr.open_dataset('fe23/franke2022-fe23_slice.nc')

In [9]:
print(fe23_slice)

<xarray.Dataset> Size: 58MB
Dimensions:       (ttime: 1159, nseries: 278, nregion: 22, lonlat: 2,
                   nchars_cinv: 42, nchars_chr: 32, nchars_ctry: 22,
                   nchars_csp: 6, nchars_cn: 51)
Coordinates:
    lonlat        (nseries, nregion, lonlat) float64 98kB ...
Dimensions without coordinates: ttime, nseries, nregion, nchars_cinv,
                                nchars_chr, nchars_ctry, nchars_csp, nchars_cn
Data variables:
    chronos       (ttime, nseries, nregion) float64 57MB ...
    investigator  (nchars_cinv, nseries, nregion) |S1 257kB ...
    trwsSm        (nseries, nregion) float64 49kB ...
    chronology    (nchars_chr, nseries, nregion) |S1 196kB ...
    country       (nchars_ctry, nseries, nregion) |S1 135kB ...
    species       (nchars_csp, nseries, nregion) |S1 37kB ...
    elevation     (nseries, nregion) float64 49kB ...
    sitename      (nchars_cn, nseries, nregion) |S1 312kB ...
    treetime      (ttime) float64 9kB ...
Attributes:
    re

In [10]:
df_fe23 = {}
vars = ['chronos', 'lonlat', 'investigator', 'trwsSm', 'chronology', 'country', 'species', 
        'elevation', 'sitename']
for var in vars:
    print(var)
    df_fe23[var] = []
    for ii in fe23_slice.nregion:        # loop through the regions
        fe23_slice[var] = np.squeeze(fe23_slice[var])
        # print(fe23_full[var].shape)
        for jj in fe23_slice.nseries:        # loop through the records in any one region
            if var in ['chronos']:  data = fe23_slice[var][:, jj, ii].data
            elif var in ['trwsSm', 'elevation']: data = float(fe23_slice[var][jj, ii].data)
            elif var in ['lonlat', 'trwsSm']:    data = fe23_slice[var][jj, ii, :].data
            elif var in ['investigator', 'chronology', 'country', 'species', 'sitename']:
                data = b''.join([ss for ss in fe23_slice[var][:, jj, ii].data]).decode("latin-1").replace(' ','')
    
            if ~np.all(np.isnan(fe23_slice['chronos'][:, jj, ii].data)):
                df_fe23[var].append(data)
# len(all_trees)


chronos
lonlat
investigator
trwsSm
chronology
country
species
elevation
sitename


# create compact dataframe

In [11]:
df_compact = pd.DataFrame(columns=['archiveType', 'climateInterpretation_variable', 'dataSetName', 'datasetId', 
                                   'geo_meanElev', 'geo_meanLat', 'geo_meanLon', 'geo_siteName', 
                                   'originalDatabase', 'originalDataURL', 'paleoData_notes', 'paleoData_proxy', 
                                   'paleoData_units', 'paleoData_values', 'year', 'yearUnits'])

In [12]:
df_compact['paleoData_values'] = df_fe23['chronos']
df_compact['year']             = [fe23_slice.treetime.data for ii in range(len(df_compact))]

In [13]:
for ii in df_compact.index:
    dd=f.convert_to_nparray(df_compact.at[ii, 'paleoData_values'])
    df_compact.at[ii, 'paleoData_values']=dd.data[~dd.mask]
    df_compact.at[ii, 'year']=np.array(df_compact.at[ii, 'year'])[~dd.mask]

In [14]:
df_compact[['geo_meanLon', 'geo_meanLat']] = df_fe23['lonlat']
df_compact['geo_meanElev']                 = df_fe23['elevation']

In [15]:
df_compact['datasetId']   = df_fe23['chronology']
df_compact['datasetId']   = df_compact['datasetId'].apply(lambda x: x.replace('.rwl',''))
df_compact['dataSetName'] = df_compact['datasetId']
df_compact['datasetId']   = df_compact['datasetId'].apply(lambda x: 'FE23_'+x)

In [16]:
url = 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/'
df_compact['geo_siteName']            = df_fe23['sitename']
df_compact['paleoData_sensorSpecies'] = df_fe23['species']
df_compact['paleoData_notes']         = df_fe23['investigator']
df_compact['paleoData_notes']         = df_compact['paleoData_notes'].apply(lambda x: 'Investigator: '+x)
df_compact['originalDataURL']         = df_compact['dataSetName'].apply(lambda x: url+x.replace('_','/')+'.rwl')

In [17]:
df_compact['archiveType']      = 'tree' # fills called 'paleoData_variableName' 
df_compact['paleoData_proxy']  = 'TRW' # fills column called 'paleoData_variableName' 
df_compact['paleoData_units']  = 'standardized_anomalies' # fills column called 'paleoData_units' 
df_compact['originalDatabase'] = 'FE23 (Breitenmoser et al. (2014))' # fills column 'originalDatabase' 
df_compact['yearUnits']        = 'CE'  # fills column 'yearUnits'

In [18]:
TM = {1.:'temperature', 2.:'moisture', 3.:'temperature+moisture', 4.: 'NOT temperature NOT moisture', 0:'nan'}
df_compact['climateInterpretation_variable'] = df_fe23['trwsSm']
df_compact['climateInterpretation_variable'] = df_compact['climateInterpretation_variable'].apply(lambda x: TM[x] if ~np.isnan(x) else 'N/A')
df_compact['climateInterpretation_variableDetail'] = 'N/A'

In [19]:
# mask nans and exclude from dataframe
for ii in df_compact.index:
    dd=f.convert_to_nparray(df_compact.at[ii, 'paleoData_values'])
    df_compact.at[ii, 'paleoData_values']=dd.data[~dd.mask]
    df_compact.at[ii, 'year']=df_compact.at[ii, 'year'][~dd.mask]
    
drop_inds = []
for ii in range(df_compact.shape[0]):
    if len(df_compact.iloc[ii]['year'])==0:
        print('empty', ii, df_compact.iloc[ii]['year'], df_compact.iloc[ii]['originalDatabase'])
        print(df_compact.iloc[ii]['paleoData_values'])
        drop_inds += [df_compact.index[ii]]
        
for ii, row in enumerate(df_compact.paleoData_values):
    if np.std(row)==0: 
        print(ii, 'std=0')
    elif np.sum(np.diff(row)**2)==0: 
        print(ii, 'diff=0')
    elif np.isnan(np.std(row)):
        print(ii, 'std nan')
    else:
        continue
    if df.index[ii] not in drop_inds: 
        drop_inds += [df_compact.index[ii]]
    
print(drop_inds)
df_compact = df_compact.drop(index=drop_inds)

[]


In [20]:
#  check that the datasetId is unique 
print(len(df_compact.datasetId.unique()))
print(len(df_compact))

2754
2754


## save compact dataframe

### save pickle

In [21]:
# save to a pickle file (security: is it better to save to csv?)
df_compact = df_compact[sorted(df_compact.columns)]
df_compact.to_pickle('fe23/fe23_compact.pkl')

### save csv

In [22]:
# save to a list of csv files (metadata, data, year)
df_compact.name='fe23'
f.write_compact_dataframe_to_csv(df_compact)

METADATA: archiveType, climateInterpretation_variable, climateInterpretation_variableDetail, dataSetName, datasetId, geo_meanElev, geo_meanLat, geo_meanLon, geo_siteName, originalDataURL, originalDatabase, paleoData_notes, paleoData_proxy, paleoData_sensorSpecies, paleoData_units, yearUnits
Saved to /home/jupyter-lluecke/compile_proxy_database_v2.1/fe23/fe23_compact_%s.csv


In [23]:
# load dataframe
f.load_compact_dataframe_from_csv('fe23').info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2754 entries, 0 to 2753
Data columns (total 18 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   archiveType                           2754 non-null   object 
 1   climateInterpretation_variable        2754 non-null   object 
 2   climateInterpretation_variableDetail  2754 non-null   object 
 3   dataSetName                           2754 non-null   object 
 4   datasetId                             2754 non-null   object 
 5   geo_meanElev                          2710 non-null   float32
 6   geo_meanLat                           2754 non-null   float32
 7   geo_meanLon                           2754 non-null   float32
 8   geo_siteName                          2754 non-null   object 
 9   originalDataURL                       2754 non-null   object 
 10  originalDatabase                      2754 non-null   object 
 11  paleoData_notes  

# check output

## dataset metadata: dataSetName, datasetId, originalDataURL, originalDatabase

### index

In [24]:
# # check index
print(df_compact.index)

RangeIndex(start=0, stop=2754, step=1)


### dataSetName

In [25]:
# # check dataSetName
key = 'dataSetName'
print('%s: '%key)
print(df_compact[key].values)

dataSetName: 
['africa_keny001' 'africa_keny002' 'africa_morc001' ...
 'northamerica_usa_wy034' 'northamerica_usa_wy035'
 'northamerica_usa_wy036']


### datasetId

In [26]:
# # check datasetId

print(len(df_compact.datasetId.unique()))
print(len(df_compact))
key = 'datasetId'
print('%s (starts with): '%key)
print(df_compact[key].values)

2754
2754
datasetId (starts with): 
['FE23_africa_keny001' 'FE23_africa_keny002' 'FE23_africa_morc001' ...
 'FE23_northamerica_usa_wy034' 'FE23_northamerica_usa_wy035'
 'FE23_northamerica_usa_wy036']


### originalDataURL

In [27]:
# originalDataURL
key = 'originalDataURL'
print('%s: '%key)
print(np.sort(np.unique([kk for kk in df_compact[key]])))
# 'this study' should point to the correct URL (PAGES2k)

originalDataURL: 
['https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/africa/keny001.rwl'
 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/africa/keny002.rwl'
 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/africa/morc001.rwl'
 ...
 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/southamerica/chil016.rwl'
 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/southamerica/chil017.rwl'
 'https://www.ncei.noaa.gov/pub/data/paleo/treering/measurements/southamerica/chil018.rwl']


### originalDatabase

In [28]:
# # originalDataSet
key = 'originalDatabase'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))
# Note: the last two records have missing URLs

originalDatabase: 
['FE23 (Breitenmoser et al. (2014))']


## geographical metadata: elevation, latitude, longitude, site name

### geo_meanElev

In [29]:
# check Elevation
key = 'geo_meanElev'
print('%s: '%key)
print(df_compact[key])
print(np.unique(['%d'%kk for kk in df_compact[key] if np.isfinite(kk)]))

geo_meanElev: 
0       2010.0
1       2010.0
2       2200.0
3       1700.0
4       2200.0
         ...  
2749    2500.0
2750    2542.0
2751    1319.0
2752    2400.0
2753    2378.0
Name: geo_meanElev, Length: 2754, dtype: float64
['0' '1' '10' '100' '1000' '1002' '1005' '1006' '101' '1010' '1020' '1030'
 '1036' '1040' '1047' '105' '1050' '1051' '1052' '1055' '1060' '1065'
 '1067' '107' '1070' '1071' '1075' '108' '1080' '1085' '109' '1090' '1095'
 '1097' '110' '1100' '111' '1110' '1120' '1128' '1130' '1132' '1140'
 '1146' '115' '1150' '1155' '1156' '1158' '116' '1160' '1167' '1169'
 '1170' '1175' '1180' '1194' '12' '120' '1200' '1201' '1206' '1208' '1219'
 '1220' '1224' '1225' '1230' '1231' '1234' '1235' '1237' '1240' '1250'
 '1253' '126' '1260' '1270' '1275' '1280' '1285' '13' '130' '1300' '1302'
 '131' '1310' '1311' '1315' '1317' '1319' '1320' '1325' '1330' '1340'
 '135' '1350' '1354' '136' '1360' '1366' '1367' '1370' '1372' '1375'
 '1377' '138' '1380' '1385' '1390' '1391' '1392' '1395

### geo_meanLat

In [30]:
# # Latitude
key = 'geo_meanLat'
print('%s: '%key)
print(np.unique(['%d'%kk for kk in df_compact[key]]))

geo_meanLat: 
['-18' '-22' '-23' '-24' '-25' '-26' '-27' '-31' '-32' '-33' '-34' '-35'
 '-36' '-37' '-38' '-39' '-40' '-41' '-42' '-43' '-44' '-45' '-46' '-50'
 '-53' '-54' '-7' '0' '16' '17' '19' '20' '21' '23' '24' '25' '26' '27'
 '28' '29' '30' '31' '32' '33' '34' '35' '36' '37' '38' '39' '40' '41'
 '42' '43' '44' '45' '46' '47' '48' '49' '50' '51' '52' '53' '54' '55'
 '56' '57' '58' '59' '60' '61' '62' '63' '64' '65' '66' '67' '68' '69'
 '70' '71' '72']


### geo_meanLon

In [31]:
# # Longitude 
key = 'geo_meanLon'
print('%s: '%key)
print(np.unique(['%d'%kk for kk in df_compact[key]]))

geo_meanLon: 
['-1' '-100' '-101' '-102' '-103' '-104' '-105' '-106' '-107' '-108'
 '-109' '-110' '-111' '-112' '-113' '-114' '-115' '-116' '-117' '-118'
 '-119' '-120' '-121' '-122' '-123' '-124' '-125' '-126' '-127' '-128'
 '-129' '-130' '-133' '-134' '-135' '-136' '-137' '-138' '-139' '-140'
 '-141' '-142' '-143' '-144' '-145' '-146' '-147' '-148' '-149' '-150'
 '-151' '-152' '-153' '-154' '-159' '-162' '-163' '-2' '-3' '-4' '-5'
 '-58' '-6' '-61' '-62' '-63' '-64' '-65' '-66' '-67' '-68' '-69' '-7'
 '-70' '-71' '-72' '-73' '-74' '-75' '-76' '-77' '-78' '-79' '-8' '-80'
 '-81' '-82' '-83' '-84' '-85' '-86' '-87' '-88' '-89' '-9' '-90' '-91'
 '-92' '-93' '-94' '-95' '-96' '-97' '-98' '-99' '0' '1' '10' '100' '101'
 '103' '104' '105' '106' '107' '109' '11' '110' '111' '112' '114' '115'
 '117' '118' '119' '12' '122' '125' '127' '128' '129' '13' '130' '132'
 '133' '136' '137' '138' '14' '141' '142' '143' '145' '146' '147' '148'
 '149' '15' '150' '151' '153' '154' '155' '158' '159' '16' 

### geo_siteName

In [32]:
# Site Name 
key = 'geo_siteName'
print('%s: '%key)
print(df_compact[key].values)

geo_siteName: 
['RagatiForestStationNyeriDistrict' 'RagatiForestStationNyeriDistrict'
 'Tounfite' ... 'DevilsTowerNationalMonument' 'CookingHillside'
 'KretecVale']


## proxy metadata: archive type, proxy type, interpretation

### archiveType

In [33]:
# now check all the entries bit by bit (can be omitted at a later stage)

# archiveType
key = 'archiveType'
print('%s: '%key)
print(np.unique(df_compact[key]))

archiveType: 
['tree']


### paleoData_proxy

In [34]:
# paleoData_proxy
key = 'paleoData_proxy'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))

paleoData_proxy: 
['TRW']


### paleoData_notes

In [35]:
# # paleoData_notes
key = 'paleoData_notes'
print('%s: '%key)
print(df_compact[key].values)

paleoData_notes: 
['Investigator: Stahle' 'Investigator: Stahle' 'Investigator: Stockton'
 ... 'Investigator: Stambaugh' 'Investigator: King' 'Investigator: King']


### climateInterpretation_variable

In [36]:
# climate_interpretation
key = 'climateInterpretation_variable'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))


climateInterpretation_variable: 
['N/A' 'NOT temperature NOT moisture' 'moisture' 'temperature'
 'temperature+moisture']


### climateInterpretation_variableDetail

In [37]:
# climate_interpretation
key = 'climateInterpretation_variableDetail'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))


climateInterpretation_variableDetail: 
['N/A']


### paleoData_sensorSpecies

In [38]:
# climate_interpretation
key = 'paleoData_sensorSpecies'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))


paleoData_sensorSpecies: 
['ABAL' 'ABAM' 'ABBA' 'ABBO' 'ABCE' 'ABCI' 'ABCO' 'ABLA' 'ABMA' 'ABPI'
 'ABPN' 'ABPR' 'ABSB' 'ABSP' 'ACRU' 'ACSH' 'ADHO' 'ADUS' 'AGAU' 'ARAR'
 'ATCU' 'ATSE' 'AUCH' 'BEPU' 'CABU' 'CADE' 'CADN' 'CARO' 'CDAT' 'CDBR'
 'CDDE' 'CDLI' 'CEAN' 'CESP' 'CHLA' 'CHNO' 'DABI' 'DACO' 'FAGR' 'FASY'
 'FICU' 'FRNI' 'HABI' 'JGAU' 'JUEX' 'JUFO' 'JUOC' 'JUPH' 'JUPR' 'JURE'
 'JUSC' 'JUSP' 'JUVI' 'LADE' 'LAGM' 'LALA' 'LALY' 'LAOC' 'LASI' 'LGFR'
 'LIBI' 'LITU' 'NOBE' 'NOGU' 'NOME' 'NOPU' 'NOSO' 'PCAB' 'PCEN' 'PCGL'
 'PCGN' 'PCMA' 'PCOB' 'PCOM' 'PCPU' 'PCRU' 'PCSH' 'PCSI' 'PCSM' 'PCSP'
 'PHAL' 'PHAS' 'PHGL' 'PHTR' 'PIAL' 'PIAM' 'PIAR' 'PIBA' 'PIBN' 'PIBR'
 'PICE' 'PICL' 'PICO' 'PIEC' 'PIED' 'PIFL' 'PIHA' 'PIHR' 'PIJE' 'PIKO'
 'PILA' 'PILE' 'PILO' 'PIMO' 'PIMU' 'PIMZ' 'PINI' 'PIPA' 'PIPE' 'PIPI'
 'PIPN' 'PIPO' 'PIPU' 'PIRE' 'PIRI' 'PIRO' 'PISF' 'PISI' 'PISP' 'PIST'
 'PISY' 'PITA' 'PITO' 'PIUN' 'PIVI' 'PIWA' 'PLRA' 'PLUV' 'PPDE' 'PPSP'
 'PRMA' 'PSMA' 'PSME' 'PTAN' 'QUAL' 'QUDG' 'QUFR' '

## data 

### paleoData_units

In [39]:
# paleoData_units
key = 'paleoData_units'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))

paleoData_units: 
['standardized_anomalies']


### paleoData_values

In [40]:
# # paleoData_values
key = 'paleoData_values'

print('%s: '%key)
for ii, vv in enumerate(df_compact[key][:20]):
    try: 
        print('%-30s: %s -- %s'%(df_compact['dataSetName'].iloc[ii][:30], str(np.nanmin(vv)), str(np.nanmax(vv))))
        print(type(vv))
    except: print(df_compact['dataSetName'].iloc[ii], 'NaNs detected.')

paleoData_values: 
africa_keny001                : 0.4 -- 1.423
<class 'numpy.ndarray'>
africa_keny002                : 0.499 -- 1.631
<class 'numpy.ndarray'>
africa_morc001                : -0.014 -- 2.226
<class 'numpy.ndarray'>
africa_morc002                : 0.323 -- 1.587
<class 'numpy.ndarray'>
africa_morc003                : 0.004 -- 1.617
<class 'numpy.ndarray'>
africa_morc011                : 0.005 -- 2.094
<class 'numpy.ndarray'>
africa_morc012                : 0.435 -- 1.866
<class 'numpy.ndarray'>
africa_morc013                : 0.166 -- 1.389
<class 'numpy.ndarray'>
africa_morc014                : -0.025 -- 2.012
<class 'numpy.ndarray'>
africa_safr001                : 0.485 -- 2.129
<class 'numpy.ndarray'>
africa_zimb001                : 0.15 -- 2.415
<class 'numpy.ndarray'>
africa_zimb002                : 0.178 -- 2.044
<class 'numpy.ndarray'>
africa_zimb003                : 0.24 -- 2.701
<class 'numpy.ndarray'>
southamerica_arge             : 0.161 -- 1.867
<class 'numpy

### year

In [41]:
# # year
key = 'year'
print('%s: '%key)
for ii, vv in enumerate(df_compact[key][:20]):
    try: print('%-30s: %s -- %s'%(df_compact['dataSetName'].iloc[ii][:30], str(np.nanmin(vv)), str(np.nanmax(vv))))
    except: print('NaNs detected.', vv)

year: 
africa_keny001                : 1944.0 -- 1993.0
africa_keny002                : 1950.0 -- 1994.0
africa_morc001                : 1360.0 -- 1983.0
africa_morc002                : 1686.0 -- 1984.0
africa_morc003                : 1755.0 -- 1984.0
africa_morc011                : 1598.0 -- 1984.0
africa_morc012                : 1813.0 -- 1984.0
africa_morc013                : 1854.0 -- 1984.0
africa_morc014                : 1200.0 -- 1984.0
africa_safr001                : 1665.0 -- 1976.0
africa_zimb001                : 1925.0 -- 1994.0
africa_zimb002                : 1877.0 -- 1997.0
africa_zimb003                : 1880.0 -- 1996.0
southamerica_arge             : 1900.0 -- 1974.0
southamerica_arge001          : 1605.0 -- 1974.0
southamerica_arge002          : 1800.0 -- 1974.0
southamerica_arge004          : 1532.0 -- 1974.0
southamerica_arge005          : 1641.0 -- 1974.0
southamerica_arge006          : 1449.0 -- 1974.0
southamerica_arge007          : 1579.0 -- 1974.0


### yearUnits

In [42]:
# yearUnits
key = 'yearUnits'
print('%s: '%key)
print(np.unique([kk for kk in df_compact[key]]))

yearUnits: 
['CE']
