# 0.5 Generate Noisy Data and Fitting

This notebook is used to generate and fit all the data required for the paper. We will generate data for the following noise cases: 

1, 2, 3, 4, 5, 6, 7 STD

## Imports

In [1]:
import sys

sys.path.append("../../")
sys.path.append("/home/ferroelectric/Documents/m3_learning/m3_learning/src")

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
from m3_learning.be.dataset import BE_Dataset
from m3_learning.viz.printing import printer
from m3_learning.be.nn import SHO_fit_func_nn, SHO_Model
from m3_learning.util.file_IO import download_and_unzip


2023-11-24 16:46:50.215142: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-24 16:46:50.215178: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-24 16:46:50.215852: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-24 16:46:50.220285: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Loading data for SHO fitting


In [5]:
# Download the data file from Zenodo
url = 'https://zenodo.org/record/7774788/files/PZT_2080_raw_data.h5?download=1'

# Specify the filename and the path to save the file
filename = '/data_raw_unmod copy.h5'
save_path = './Data'

# download the file
download_and_unzip(filename, url, save_path)

Using files already downloaded


In [6]:
data_path = save_path + '/' + filename

# instantiate the dataset object
#TODO: add resample_bins=None as default
#TODO: add to channel resample
dataset = BE_Dataset(data_path)

# print the contents of the file
dataset.print_be_tree()

/
├ Measurement_000
  ---------------
  ├ Channel_000
    -----------
    ├ Bin_FFT
    ├ Bin_Frequencies
    ├ Bin_Indices
    ├ Bin_Step
    ├ Bin_Wfm_Type
    ├ Excitation_Waveform
    ├ Noise_Floor
    ├ Position_Indices
    ├ Position_Values
    ├ Raw_Data
    ├ Spatially_Averaged_Plot_Group_000
      ---------------------------------
      ├ Bin_Frequencies
      ├ Max_Response
      ├ Mean_Spectrogram
      ├ Min_Response
      ├ Spectroscopic_Parameter
      ├ Step_Averaged_Response
    ├ Spatially_Averaged_Plot_Group_001
      ---------------------------------
      ├ Bin_Frequencies
      ├ Max_Response
      ├ Mean_Spectrogram
      ├ Min_Response
      ├ Spectroscopic_Parameter
      ├ Step_Averaged_Response
    ├ Spectroscopic_Indices
    ├ Spectroscopic_Values
    ├ UDVS
    ├ UDVS_Indices
  ├ Noisy_Data_1
  ├ Noisy_Data_2
  ├ Noisy_Data_3
  ├ Noisy_Data_4
  ├ Noisy_Data_5
  ├ Noisy_Data_6
  ├ Noisy_Data_7
  ├ Noisy_Data_8
  ├ Position_Indices
  ├ Position_Values
├ Raw_Da

In [7]:
from BGlib import be as belib
tl = belib.translators.LabViewH5Patcher()
tl.translate(dataset.file, force_patch=True)

  warn('Consider using sidpy.Reader instead of sidpy.Translator if '
If this is BEPS or BELine data from the LabView aquisition software, please run the following piece of code.  Afterwards, run this function again.
CODE: hdf.file['/Measurement_000/Channel_000'].attrs['channel_type'] = 'BE'
  warn(warn_str)


'./Data//data_raw_unmod copy.h5'

## Generates Noisy Data

This function will generate noisy records and save them as an h5_main file in the USID format. This allows the data to be computed with the Pycroscopy SHO Fitter. 

In [6]:
noise_STD = np.std(dataset.get_original_data)

In [7]:
print(noise_STD)

0.0038833667


In [8]:
import h5py
import pyUSID as usid

with h5py.File(dataset.file, "r+") as h5_f:
    h5_main = usid.hdf_utils.find_dataset(h5_f, "Raw_Data")[0]
    print(usid.hdf_utils.check_if_main(h5_main))
    print(h5_main.h5_pos_inds)



True
<HDF5 dataset "Position_Indices": shape (3600, 2), type "<u4">


In [9]:
dataset.generate_noisy_data_records(noise_levels = np.arange(1,9), 
                                    verbose=True, 
                                    noise_STD=noise_STD)

The STD of the data is: 0.0038833667058497667
Adding noise level 1


TypeError: Position dimensions should be array-like of Dimension objects

## SHO fits on all the datasets

This will take some time, Each fit takes about 10 minutes to complete. 

In [8]:
out = [f"Noisy_Data_{i}" for i in np.arange(1,9)]
out.append("Raw_Data")

for data in out:
    print(f"Fitting {data}")
    dataset.SHO_Fitter(dataset = data, h5_sho_targ_grp = f"{data}_SHO_Fit", max_mem=1024*64, max_cores= 20)

Fitting Noisy_Data_1
Working on:
./Data//data_raw_unmod copy.h5
['Y', 'X'] [60, 60]


KeyError: "'VS_mode' is not an attribute in '/'"

### Checks the results to make sure it was saved correctly

In [None]:
# print the contents of the file
dataset.print_be_tree()