In [1]:
import os
import re
import csv
import h5py
import warnings
import tqdm
import numpy as np
import pandas as pd

In [2]:
class VSS_File:
    # Class for vibration-based soft sensing database in an hdf5 file
    def __init__(self, filePath):
        self.path = filePath
        self._index = 0
        self._fileh5ref = h5py.File(filePath,'r')
        self.units =  [self.VSS_Unit_Reference(self,self._fileh5ref[group]) for group in self._fileh5ref.keys()]

    def __repr__(self):
        return f"Vibration-based database for ({len(self.units)} units)"
           
    def __iter__(self):
        return self
    
    def __next__(self):
        if self._index == len(self.units):
            self._index = 0
            raise StopIteration
        else:
            self._index += 1
            return self.units[self._index-1]
        
    # adaptar pra escolher apenas alguns compressores
    def DataframeAsList(self, attributeDict, selectedUnits = None):
        if selectedUnits is None:
            selectedUnits = self.units
        else:
            selectedUnits = [self.VSS_Unit_Reference(self,self._fileh5ref[group]) for group in selectedUnits]

        return [item for sublist in \
                [unit.filterTestsByAttributeDict(attributeDict) for unit in selectedUnits] \
                      for item in sublist]
    

    # def returnDataframe(self, attributeDict):
    #     dataList = self.DataframeAsList(attributeDict)
    #     df = pd.DataFrame(np.mean(np.array(dataset.DataframeAsList(attributeDict)[0]._h5ref["numericalMeasurements"]), axis=0))
    #     return df


    class VSS_Unit_Reference:
        # Class for a unit group inside a hdf5 file
        def __init__(self, parent, unitGroupId:h5py.Group):
            self._h5ref = unitGroupId
            self._h5file = parent
            self.name = unitGroupId.name
            self._index = 0
            self.tests = [self.VSS_Test_Reference(self,self._h5ref[group]) for group in self._h5ref.keys()]

        def __repr__(self):
            return f"Vibration-based database for unit <{self.name}> ({len(self.tests)} tests)"
    
        def __str__(self):
            return self.name
        
        def __iter__(self):
            return self
        
        def __next__(self):
            if self._index == len(self.tests):
                self._index = 0
                raise StopIteration
            else:
                self._index += 1
                return self.tests[self._index-1]
            
        # def filterTestsByAttribute(self, attribute, value):
        #     match attribute:
        #         case "angularSpeed":
        #             # 2100 2475 2850 3225 3600 RPM
        #             return [test for test in self.tests if test._h5ref.attrs['angularSpeed'] in value]
        #         case "condensingTemperature" | "evaporatingTemperature":
        #             # condensingTemperature 34º C até 54 ºC
        #             # evaporatingTemperature 10º C até 30 ºC
        #             return [test for test in self.tests if min(value) <= test._h5ref.attrs[attribute] <= max(value)]
        #         # case "repetition":
        #         #     #  n
        #         #     pass
        #         case "type":
        #             # A = mapa principal
        #             # B = mapa secundário
        #             return [test for test in self.tests if test._h5ref.attrs['type'] == value]
                
        def filterTestsByAttributeDict(self, attributeDict):
                output = [test for test in self.tests]
                if "angularSpeed" in attributeDict.keys():
                    output = [test for test in output if int(test._h5ref.attrs['angularSpeed']) \
                               in attributeDict['angularSpeed']]
                else:
                    pass

                if "condensingTemperature" in attributeDict.keys():
                    output = [test for test in output if \
                             min(attributeDict['condensingTemperature']) \
                                <= float(test._h5ref.attrs['condensingTemperature'].replace(',','.')) \
                                    <= max(attributeDict['condensingTemperature'])]
                else:
                    pass

                if "evaporatingTemperature" in attributeDict.keys():
                    output = [test for test in output if \
                             min(attributeDict['evaporatingTemperature']) \
                                <= float(test._h5ref.attrs['evaporatingTemperature'].replace(',','.')) \
                                    <= max(attributeDict['evaporatingTemperature'])]
                else:
                    pass

                if "repetition" in attributeDict.keys():
                    pass
                else:
                    pass

                if "type" in attributeDict.keys():
                    pass
                else:
                    pass

                return output


        class VSS_Test_Reference:
            # Class for a test group inside a hdf5 file
            def __init__(self, parent, testGroupId:h5py.Group):
                self._h5ref = testGroupId
                self._h5file = parent._h5file
                self.h5unit = parent
                self.date = testGroupId.name
                self.unit = parent.name
                self.name = testGroupId.name

            def __repr__(self):
                return f"Vibration soft sensing test database <{self.name}>"
    
            def __str__(self):
                return (self.name)
            
            def returnNumericalDatabase(self):
                return np.array(self._h5ref["numericalMeasurements"])
            
            def returnNumericalHeaders(self):
                return list(self._h5ref["numericalMeasurements"].attrs["columnNames"])
            
            def returnNumericalDataframe(self):
                return pd.DataFrame(data = self.returnNumericalDatabase(), columns = self.returnNumericalHeaders())
            
            def returnVibrationDatabase(self):
                return np.array(self._h5ref["vibrationMeasurements"])
            
            def returnVibrationHeaders(self):
                return list(self._h5ref["vibrationMeasurements"].attrs["columnNames"])
            
            def returnVibrationDataframe(self):
                return pd.DataFrame(data = self.returnVibrationDatabase(), columns = self.returnVibrationHeaders())
            
            def returnAttributeList(self):
                return list(dataset.units[0].tests[0]._h5ref.attrs)
            
            def splitVibrationWaveform(self, n, axis):
                vibData = np.array(self.returnVibrationDataframe()[axis])

                if np.size(vibData) % n:
                    return np.split(vibData[0:-(np.size(vibData)%n)],n)
                else:
                    return np.split(vibData,n)

            
datasetPath = "D:/Rafael/Dados/dataset.hdf5"

dataset = VSS_File(datasetPath)
        

In [68]:
filterAttributes = {
    "angularSpeed": [3600],
    "condensingTemperature": [30, 40]
}

# vib_data = dataset.DataframeAsList(filterAttributes)[0].splitVibrationWaveform(10,"x")

# vib_data[9]

# selectedUnits=["/1","/2","/3","/4","/5"]

# aaa = (dataset.units[0].filterTestsByAttributeDict(filterAttributes)[0].returnVibrationDatabase())

# pd.Series(dataset.units[0].filterTestsByAttributeDict(filterAttributes)[0]._h5ref["vibrationMeasurements"][:,1])

# np.mean(np.array(dataset.DataframeAsList(filterAttributes)[0]._h5ref["numericalMeasurements"]), axis=0)

# dataset.returnDataframe(filterAttributes)

# list(dataset.units[0].tests[0]._h5ref.attrs)


array([9.860402e-05, 9.648752e-05, 7.579776e-05, ..., 1.218000e-03,
       1.217000e-03, 1.188000e-03])

In [4]:
filterAttributes = {
    "angularSpeed": [2100, 3600],
    "condensingTemperature": [32, 36]
}

dataList = dataset.DataframeAsList(filterAttributes)

len(dataList)

# for test in dataList:
#     print(test.returnNumericalDatabase())


# numData = np.array([np.mean(test.returnNumericalDatabase(),axis=0) for test in dataList])
# # df = pd.DataFrame(numData, columns=dataList[0].returnNumericalHeaders())

# vibData = [pd.Series(test._h5ref["vibrationMeasurements"][:,1], dtype='float16') \
#             for test in tqdm.tqdm(dataList)]

# vib = []
# for test in tqdm.tqdm(dataList):
#     vib.append(test.returnVibrationDatabase()[:,1])
    


# doubled = np.array(doubled)


# (np.mean(dataList[0].returnNumericalDatabase(),axis=0))

# dataList[0].returnVibrationDatabase()[:,1]

# np.mean(np.array(dataList[0]._h5ref["numericalMeasurements"]), axis=0)

# df = pd.DataFrame((np.mean(np.array(dataList[0]._h5ref["numericalMeasurements"]), axis=0)), columns = dataList[0].returnNumericalHeaders())


# pd.DataFrame.from_records()

# [df.join(np.mean(np.array(test._h5ref["numericalMeasurements"]), axis=0)) for test in dataList]




704

In [7]:
df = pd.DataFrame({'vib_x': [pd.Series(test._h5ref["vibrationMeasurements"][:,1], dtype='float32') for test in tqdm.tqdm(dataList)]})

100%|██████████| 704/704 [01:07<00:00, 10.49it/s]


In [8]:
df.memory_usage(index=True, deep=True).sum()/(1024**3)


1.3428758382797241

In [52]:
del df