In [1]:
import sys

sys.path.append("../../")
sys.path.append("../../src")

In [2]:
# For the notebook to work you must have m3_learning installed
# pip install m3_learning
%load_ext autoreload
%autoreload 2

import numpy as np

from m3_learning.nn.random import random_seed
from m3_learning.viz.style import set_style
from m3_learning.util.file_IO import download_and_unzip
from m3_learning.viz.printing import printer
from m3_learning.be.viz import Viz
from m3_learning.be.dataset import BE_Dataset
from IPython.display import Video

import json
import logging

import torch
import torch.nn.functional as F


# from m3_learning.be.dataset import BE_Dataset
printing = printer(basepath = './Figures/')

set_style("printing")
random_seed(seed=42)

%matplotlib inline


2024-06-07 09:30:07.705771: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-07 09:30:08.236050: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/ferroelectric/micromamba/envs/paper/lib/python3.10/site-packages/cv2/../../lib64:
2024-06-07 09:30:08.236102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/ferroelectric/micromamba/envs/paper/

printing set for seaborn
Pytorch seed was set to 42
Numpy seed was set to 42
tensorflow seed was set to 42


In [3]:
import sys

sys.path.append("../../")
# sys.path.append("/home/ferroelectric/Documents/m3_learning/m3_learning/src")
sys.path.append('../../src')

## Loading data for SHO fitting


In [4]:
# Download the data file from Zenodo
url = 'https://zenodo.org/record/7774788/files/PZT_2080_raw_data.h5?download=1'

# Specify the filename and the path to save the file
filename = '/data_raw_unmod.h5'
save_path = './Data'

# download the file
download_and_unzip(filename, url, save_path)

Using files already downloaded


In [5]:
data_path = save_path + '/' + filename

# instantiate the dataset object
dataset = BE_Dataset(data_path)

# print the contents of the file
dataset.print_be_tree()

No spectroscopic datasets found as attributes of /Measurement_000/Channel_000/Position_Indices
No position datasets found as attributes of /Raw_Data-SHO_Fit_000/Spectroscopic_Values


  return (data - self.mean)/self.std


/
├ Measurement_000
  ---------------
  ├ Channel_000
    -----------
    ├ Bin_FFT
    ├ Bin_Frequencies
    ├ Bin_Indices
    ├ Bin_Step
    ├ Bin_Wfm_Type
    ├ Excitation_Waveform
    ├ Noise_Floor
    ├ Noisy_Data_1
    ├ Noisy_Data_2
    ├ Noisy_Data_3
    ├ Noisy_Data_4
    ├ Noisy_Data_5
    ├ Noisy_Data_6
    ├ Noisy_Data_7
    ├ Noisy_Data_8
    ├ Position_Indices
    ├ Position_Values
    ├ Raw_Data
    ├ Spatially_Averaged_Plot_Group_000
      ---------------------------------
      ├ Bin_Frequencies
      ├ Max_Response
      ├ Mean_Spectrogram
      ├ Min_Response
      ├ Spectroscopic_Parameter
      ├ Step_Averaged_Response
    ├ Spatially_Averaged_Plot_Group_001
      ---------------------------------
      ├ Bin_Frequencies
      ├ Max_Response
      ├ Mean_Spectrogram
      ├ Min_Response
      ├ Spectroscopic_Parameter
      ├ Step_Averaged_Response
    ├ Spectroscopic_Indices
    ├ Spectroscopic_Values
    ├ UDVS
    ├ UDVS_Indices
├ Noisy_Data_1_SHO_Fit
  --------

In [6]:
import json
import os
import re
import csv
from m3_learning.nn.Fitter1D.Fitter1D import Multiscale1DFitter, Model
from m3_learning.nn.Fitter1D.Fitter1D import ComplexPostProcessor
from m3_learning.be.nn import SHO_fit_func_nn
import pandas as pd
from datafed.CommandLib import API


def search_for_alias(listing_reply, target_alias):
    """
    Searches for an item with a matching alias within a ListingReply.

    Parameters:
    - listing_reply: SDMS_Auth_pb2.ListingReply object containing the items to search through.
    - target_alias: String representing the alias to search for.

    Returns:
    - The matching item if found, otherwise None.
    """
    for item in listing_reply.item:
        if item.alias == target_alias:  # Assuming each item has an 'alias' field
            return item.id  # Return the matching item
    return None  # Return None if no match is found

class InferenceEvaluation:
    
    def __init__(self, dataset, folder, csv_file, collection_name, project_name, system_info = None):
        self.folder = folder
        self.csv_file = csv_file
        self.load_from_csv()
        self.dataset = dataset
        self.get_reference_data()
        self.base_path(csv_file)
        if system_info is None:
            self.parse_system_info(f'{self.base_path_}/system_info.txt')
        self.collection_name = collection_name
        self.project_name = project_name
        self.df_api = API()
        
        # Create a logger
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.WARNING)
        
    def base_path(self, filename):
        # Find the last occurrence of '/'
        last_slash_index = filename.rfind('/')
        # Extract everything before the last '/'
        directory_path = filename[:last_slash_index] if last_slash_index != - \
            1 else filename
        self.base_path_ = directory_path
        
        
    def load_from_csv(self):
        
        
        
        # Initialize an empty list to store all rows of data
        data_list = []

        # Open and read the CSV file
        with open(self.csv_file, 'r') as file:
            csvreader = csv.reader(file)
            next(csvreader, None)  # Skip the header row (if any
            
            # Iterate over each row in the csv file
            for row in csvreader:
                # Assuming each row follows the structure you provided
                model_name = row[0]
                i = int(row[3])
                optimizer_name = row[2]
                noise = row[1]
                
                epochs = int(row[4])
                total_time = float(row[5])
                train_loss = float(row[6])
                batch_size = int(row[7])
                loss_func = row[8]
                seed = int(row[9])
                model_path = row[10]  # This already includes the path with the filename
                stoppage_early = row[11]  # Assuming this is a string that needs to be interpreted
                model_updates = int(row[12])
                
                # Append this row of data as a dictionary (or any other structure you prefer) to your list
                data_list.append({
                    'model_name': model_name,
                    'i': i,
                    'noise': int(noise),
                    'optimizer_name': optimizer_name,
                    'epochs': epochs,
                    'total_time': total_time,
                    'train_loss': train_loss,
                    'batch_size': batch_size,
                    'loss_func': loss_func,
                    'seed': seed,
                    'stoppage_early': stoppage_early == "True", # Convert to boolean 
                    'model_updates': model_updates
                })
                
        self.data_list = data_list
        
    def get_file(self, data):
        for filename in os.listdir(self.folder):
            
            if data['stoppage_early']:
                pattern = rf'Early_Stoppage_at_{data["total_time"]}_{data["model_name"]}_model_optimizer_{data["optimizer_name"]}_epoch_{data["epochs"]}_train_loss_{data["train_loss"]}.pth'
                match = re.match(pattern, filename)
                
            else:
                pattern = rf'{data["model_name"]}_model_optimizer_{data["optimizer_name"]}_epoch_{data["epochs"]}_train_loss_{data["train_loss"]}.pth'
                match = re.match(pattern, filename)
            
            if match:
                data['filename'] = filename
                return filename
            
    def get_reference_data(self, noise_state=0):
        self.dataset.noise = noise_state

        # extracts the x and y data based on the noise
        self.x_reference, self.y_reference = self.dataset.NN_data()
    
    def load_model(self, model_path):
        
        postprocessor = ComplexPostProcessor(self.dataset)


        model_ = Multiscale1DFitter(SHO_fit_func_nn,  # function
                                    self.dataset.frequency_bin,  # x data
                                    2,  # input channels
                                    4,  # output channels
                                    self.dataset.SHO_scaler,
                                    postprocessor)

        # instantiate the model
        model = Model(model_, dataset, training=False)
        
        model.load(model_path)
        
        return model
    
    def evaluate(self, data):
        
        print(f'Evaluating the model: {data["model_name"]}\n')
        
        self.dataset.noise = int(data["noise"])

        # extracts the x and y data based on the noise
        x_data, y_data = self.dataset.NN_data()
        
        model = self.load_model(f"{self.base_path_}/{data['filename']}")
        
        pred_data, scaled_param, parm = model.predict(x_data)
        
        mse_input = F.mse_loss(x_data, self.x_reference, reduction='mean')
        mse = F.mse_loss(pred_data, self.x_reference, reduction='mean')
        
        data['mse'] = mse.item()
        data['mse_input'] = mse_input.item()
        
        return data
        
    def parse_system_info(self, file_path):
        system_info = {"System Information": {}}
        gpu_num = 0
        GPU = False
        with open(file_path, 'r') as file:
            for line in file:
                if line.startswith("GPU: NVIDIA"):
                    gpu_num += 1
                    GPU = True
                    gpu_key = f"GPU Information_{gpu_num}"
                    system_info["System Information"][f"{gpu_key}"] = {} 
                 
                if GPU:
                    if ":" in line:
                        key, value = line.split(":", 1)
                        system_info["System Information"][f"{gpu_key}"][key.strip()
                                                        ] = value.strip()
                else:
                    if ":" in line:
                        key, value = line.split(":", 1)
                        system_info["System Information"][key.strip()
                                                        ] = value.strip()

        self.system_info_ = system_info
        
    def upload_to_datafed(self, data=None):
        
        if data is None:
            data = self.data
            
        data['system_info'] = self.system_info_['System Information']

        df_api = API()

        df_api.setContext(self.project_name)

        coll_alias = self.collection_name

        try:
            coll_resp = df_api.collectionCreate(coll_alias.replace("_", " "),
                                                alias=coll_alias,
                                                parent_id="root")
        except:
            pass

        out, reply = df_api.collectionItemsList('root')
        parent_id = search_for_alias(out, coll_alias)

        try:
            dc_resp = df_api.dataCreate(f"stop_{data['stoppage_early']}i_{data['i']}_Noise_{data['noise']}_opt_{data['optimizer_name']}_ep_{data['epochs']}_loss_{data['train_loss']:0.3f}",
                                        metadata=json.dumps(data),
                                        parent_id=parent_id,  # parent collection
                                        )
            self.logger.info(f'Data creation successful.')

        except Exception as e:
            self.logger.warning(f'Data creation failed with error: {e}')

        file_path = self.base_path_ + '/' + data['filename']

        try:
            put_resp = df_api.dataPut(dc_resp[0].data[0].id,
                                    file_path,
                                    wait=False,  # Waits until transfer completes.
                                    )
            self.logger.info(f'Data put successful.')

        except Exception as e:
            self.logger.warning(f'Data put failed with error: {e}')

    def run(self):
        for data in self.data_list:
            self.get_file(data)
            self.evaluate(data)
            self.upload_to_datafed(data)

In [7]:
filepath = "/home/ferroelectric/Documents/m3_learning/m3_learning/papers/2023_Rapid_Fitting/Trained Models/SHO Fitter/2023-12-12_11-36-09_nn_benchmarks_noise"
csv_file = "/home/ferroelectric/Documents/m3_learning/m3_learning/papers/2023_Rapid_Fitting/Trained Models/SHO Fitter/2023-12-12_11-36-09_nn_benchmarks_noise/Batch_Training_SpeedTest.csv"

evaluation = InferenceEvaluation(
    dataset, filepath, csv_file, "sho_fitting_benchmarks_v1", "p/2024_sho_fitting")


evaluation.run()


        Dataset = Raw_Data
        Resample = False
        Raw Format = complex
        fitter = LSQF
        scaled = False
        Output Shape = pixels
        Measurement State = all
        Resample Resampled = False
        Resample Bins = 165
        LSQF Phase Shift = None
        NN Phase Shift = None
        Noise Level = 0
        loop interpolated = False
                    
None
Evaluating the model: SHO_Fitter_original_data


        Dataset = Raw_Data
        Resample = False
        Raw Format = complex
        fitter = LSQF
        scaled = False
        Output Shape = pixels
        Measurement State = all
        Resample Resampled = False
        Resample Bins = 165
        LSQF Phase Shift = None
        NN Phase Shift = None
        Noise Level = 0
        loop interpolated = False
                    
None
Using GPU NVIDIA GeForce RTX 3090
Evaluating the model: SHO_Fitter_original_data


        Dataset = Raw_Data
        Resample = False
        Raw Format = 

## Analysis of Benchmark for SHO fitting

In [None]:
record_path = 

In [6]:
import json

def get_metadata(record_id):
    dv_resp = df_api.dataView(record_id)
    return json.loads(dv_resp[0].data[0].metadata)

In [7]:
def get_all_records(collection_name):
    out, reply = df_api.collectionItemsList('root')
    parent_id = search_for_alias(out, collection_name)
    out, reply = df_api.collectionItemsList(parent_id)
    return out

In [8]:
from datafed.CommandLib import API
df_api = API()

collection = "c/524269649"
coll_list_resp = df_api.collectionItemsList(collection, count=1000)
print(coll_list_resp)

(item {
  id: "d/524269698"
  title: "stop_Falsei_0_Noise_0_opt_Adam_ep_4_loss_0.034"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 0
  notes: 0
}
item {
  id: "d/524276222"
  title: "stop_Falsei_0_Noise_0_opt_Adam_ep_4_loss_0.034_524276222"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 100830
  notes: 0
}
item {
  id: "d/524269851"
  title: "stop_Falsei_1_Noise_0_opt_Adam_ep_4_loss_0.034"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 0
  notes: 0
}
item {
  id: "d/524276420"
  title: "stop_Falsei_1_Noise_0_opt_Adam_ep_4_loss_0.034_524276420"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 100830
  notes: 0
}
item {
  id: "d/524271136"
  title: "stop_Falsei_10_Noise_0_opt_Adam_ep_4_loss_0.034"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 0
  notes: 0
}
item {
  id: "d/524278291"
  title: "stop_Falsei_10_Noise_0_opt_Adam_ep_4_loss_0.034_524278291"
  owner: "p/2024_sho_fitting"
  creator: "u/jca318"
  size: 100868
  not

In [9]:
metadata = []

for num, i in enumerate(coll_list_resp[0].item):
    print(i.id)
    print(num)
    metadata.append(get_metadata(i.id))

d/524269698
0
d/524276222
1
d/524269851
2
d/524276420
3
d/524271136
4
d/524278291
5
d/524299027
6
d/524299270
7
d/524299500
8
d/524299733
9
d/524299972
10
d/524300196
11
d/524300413
12
d/524300643
13
d/524300873
14
d/524301118
15
d/524271276
16
d/524278510
17
d/524301348
18
d/524301578
19
d/524301808
20
d/524302025
21
d/524302277
22
d/524302507
23
d/524302737
24
d/524302967
25
d/524303178
26
d/524303408
27
d/524271416
28
d/524278727
29
d/524303638
30
d/524303877
31
d/524304107
32
d/524304337
33
d/524304554
34
d/524304784
35
d/524305014
36
d/524305244
37
d/524305487
38
d/524305717
39
d/524271569
40
d/524278947
41
d/524305947
42
d/524306158
43
d/524306388
44
d/524306618
45
d/524306848
46
d/524307078
47
d/524307308
48
d/524307538
49
d/524308003
50
d/524308233
51
d/524271735
52
d/524279177
53
d/524308463
54
d/524308723
55
d/524308953
56
d/524309170
57
d/524309400
58
d/524309624
59
d/524309854
60
d/524310093
61
d/524310323
62
d/524310540
63
d/524271888
64
d/524279407
65
d/524310770
66
d/524

In [11]:
metadata[0]

{'batch_size': 500,
 'epochs': 4,
 'filename': 'SHO_Fitter_original_data_model_optimizer_Adam_epoch_4_train_loss_0.03396249711867885.pth',
 'i': 0,
 'loss_func': 'MSELoss()',
 'model_name': 'SHO_Fitter_original_data',
 'model_updates': 11060,
 'mse': 0.033908843994140625,
 'mse_input': 0,
 'noise': 0,
 'optimizer_name': 'Adam',
 'seed': 0,
 'stoppage_early': False,
 'system_info': {'Available RAM': '58.39 GB',
  'Current Frequency': '2235.15Mhz',
  'GPU Information_1': {'Free Memory': '24234.0MB',
   'GPU': 'NVIDIA GeForce RTX 3090',
   'GPU Load': '0.0%',
   'GPU Temperature': '49.0 C',
   'Total Memory': '24576.0MB',
   'Used Memory': '22.0MB'},
  'GPU Information_2': {'Free Memory': '24253.0MB',
   'GPU': 'NVIDIA GeForce RTX 3090',
   'GPU Load': '0.0%',
   'GPU Temperature': '39.0 C',
   'Total Memory': '24576.0MB',
   'Used Memory': '6.0MB'},
  'Machine': 'x86_64',
  'Max Frequency': '3800.00Mhz',
  'Min Frequency': '2200.00Mhz',
  'Node Name': 'nx-248-36-21.dhcp.drexel.edu',
  'P