<font size = "5"> **pyNSID Data format** </font>


<hr style="height:1px;border-top:4px solid #FF8200" />

# pyNSID Data Format: First Test

part of 

<font size = "5"> **[pycroscopy](https://pycroscopy.github.io/pycroscopy/about.html)**</font>


by Gerd Duscher, Fall 2020
The University of Tennessee, Knoxville


In [None]:
#import sys
#!{sys.executable} -m pip install  --upgrade  ../pyNSID/

## Load Necessary Packages

In [1]:
import h5py

# Import pyNSID
import os, sys
sys.path.append('../../..')

import pyNSID as nsid

#print('pyNSID version: ', nsid.__version__.version)

## Input
Some Random Data

In [2]:
main_data_name  = 'nDim_Data'
main_data = np.random.rand(5, 7, 11, 3)
quantity = 'intensity'
units= "pixel"

dim_dict = {0: nsid.io.Dimension('Y', 'Length', 'um', np.linspace(0, 10, num=5), True),
        1: nsid.io.Dimension('X', 'Length', 'um', np.linspace(0, 6, num=7), True),
        2: nsid.io.Dimension('DC offset', 'Bias', 'V', np.sin(np.linspace(0, 1, num=11) * 2 * np.pi), True),
        3: nsid.io.Dimension('BE Frequency', 'Frequency', 'Hz', np.linspace(0, 10, num=3), True)}

dim_dict 


{0: Y - Length (um): [ 0.   2.5  5.   7.5 10. ],
 1: X - Length (um): [0. 1. 2. 3. 4. 5. 6.],
 2: DC offset - Bias (V): [ 0.00000000e+00  5.87785252e-01  9.51056516e-01  9.51056516e-01
   5.87785252e-01  1.22464680e-16 -5.87785252e-01 -9.51056516e-01
  -9.51056516e-01 -5.87785252e-01 -2.44929360e-16],
 3: BE Frequency - Frequency (Hz): [ 0.  5. 10.]}

## Open File and make one Dimension a dataset

In [3]:
try:
    h5_file.close()
except:
    pass
h5_file =  h5py.File('test.hf5', mode='a')
if "Measurement_000/Channel_000" in h5_file:
    current_channel = h5_file["Measurement_000/Channel_000"]
    for key in current_channel:
        del current_channel[key]
else:
    current_channel = h5_file.create_group("Measurement_000/Channel_000")

if 'DC offset' not in current_channel:
    current_channel['DC offset'] = np.sin(np.linspace(0, 1, num=11) * 2 * np.pi)

dim_dict[2] = current_channel['DC offset'] 
current_channel['DC offset'].attrs['name']= 'DC offset'
current_channel['DC offset'].attrs['quantity']= 'Bias'
current_channel['DC offset'].attrs['units'] = 'V'
current_channel['DC offset'].attrs['is_position'] = False
dim_dict


{0: Y - Length (um): [ 0.   2.5  5.   7.5 10. ],
 1: X - Length (um): [0. 1. 2. 3. 4. 5. 6.],
 2: <HDF5 dataset "DC offset": shape (11,), type "<f8">,
 3: BE Frequency - Frequency (Hz): [ 0.  5. 10.]}

## Write Dataset

In [4]:
main_data_name  = 'nDim_Data'
main_data = np.random.rand(5, 7, 11, 3)
quantity = 'intensity'
units="pixel"
data_type = 'STM_spectroscopy'
modality = 'test'
source= 'Gedanken'
print(dim_dict)
current_dataset = nsid.io.hdf_utils.write_main_dataset(current_channel, main_data, main_data_name, 
                                                       quantity, units, data_type, modality, source, 
                                                       dim_dict,verbose=True)

{0: Y - Length (um): [ 0.   2.5  5.   7.5 10. ], 1: X - Length (um): [0. 1. 2. 3. 4. 5. 6.], 2: <HDF5 dataset "DC offset": shape (11,), type "<f8">, 3: BE Frequency - Frequency (Hz): [ 0.  5. 10.]}
h5 group and file OK
quantity, units, main_data_name all OK
Provided numpy or Dask array for main_data OK so far
Dimensions are correct!
Created main dataset with provided data
Wrote dimensions and attributes to main dataset
Successfully linked datasets - dataset should be main now


## Check if main

In [5]:
h5_main = current_dataset
if len(current_dataset.dims) <3:
    pass
attrs_names = ['is_position', 'name', 'nsid_version', 'quantity', 'units']
attr_success = []
length_success = []
dset_success = []
### Check for 
for i, dimension in enumerate(current_dataset.dims):
    # check for all required attributes
    h5_dim_dset =  current_channel[dimension.label]
    attr_success.append(np.all([att in h5_dim_dset.attrs for att in attrs_names]))
    dset_success.append(np.all([attr_success, isinstance(h5_dim_dset, h5py.Dataset)]))
    # dimensional scale has to be 1D
    if len(h5_dim_dset.shape) == 1:
        # and of the same length as the shape of the dataset
        length_success.append(current_dataset.shape[i] == h5_dim_dset.shape[0] )
    else:
        length_success.append(False)
# We have the list now and can get error messages according to which dataset is bad or not.
print ('Dimensions: All Attributes: ', np.all(attr_success))
print ('Dimensions: All Correct Length: ',np.all(length_success))
print ('Dimensions: All h5 Datasets: ',np.all(dset_success))

#Check for all required attributes in dataset
main_attrs_names = ['quantity', 'units', 'main_data_name','data_type', 'modality', 'source']
main_attr_success = np.all([att in h5_main.attrs for att in main_attrs_names])
print('All Attributes in dataset: ', main_attr_success)


Dimensions: All Attributes:  True
Dimensions: All Correct Length:  True
Dimensions: All h5 Datasets:  True
All Attributes in dataset:  True


In [6]:
nsid.io.hdf_utils.check_if_main(current_dataset)

True