In [1]:
import h5py

def explore_h5py_structure(item, indent=0):
    """Recursively explore the HDF5 structure."""
    indentation = "  " * indent
    if isinstance(item, h5py.Group):
        print(f"{indentation}Group: {item.name}")
        for key in item.keys():
            explore_h5py_structure(item[key], indent + 1)
    elif isinstance(item, h5py.Dataset):
        print(f"{indentation}Dataset: {item.name}")
        print(f"{indentation}  shape: {item.shape}")
        print(f"{indentation}  dtype: {item.dtype}")
        print(f"{indentation}  values: {item[:5]}...")  # Show the first 5 entries for brevity

def read_cmu_mosei_labels(file_path):
    with h5py.File(file_path, 'r') as f:
        print(type(f))  # Print the type of the file object
        for key in f.keys():
            print(f"Key: {key}")
            explore_h5py_structure(f[key])

if __name__ == '__main__':
    file_path = "/Users/dinesh/College/final proj/attempt3/CMU_MOSI_Opinion_Labels.csd"
    read_cmu_mosei_labels(file_path)


<class 'h5py._hl.files.File'>
Key: Opinion Segment Labels
Group: /Opinion Segment Labels
  Group: /Opinion Segment Labels/data
    Group: /Opinion Segment Labels/data/03bSnISJMiM
      Dataset: /Opinion Segment Labels/data/03bSnISJMiM/features
        shape: (13, 1)
        dtype: float32
        values: [[ 2.4 ]
 [-0.8 ]
 [-1.  ]
 [-1.75]
 [ 0.  ]]...
      Dataset: /Opinion Segment Labels/data/03bSnISJMiM/intervals
        shape: (13, 2)
        dtype: float32
        values: [[51.904533 55.94535 ]
 [56.045124 66.78072 ]
 [66.78072  68.73628 ]
 [68.73628  70.542175]
 [70.542175 71.69955 ]]...
    Group: /Opinion Segment Labels/data/0h-zjBukYpk
      Dataset: /Opinion Segment Labels/data/0h-zjBukYpk/features
        shape: (25, 1)
        dtype: float32
        values: [[-0.4]
 [-2.8]
 [-0.2]
 [-1.4]
 [ 0.8]]...
      Dataset: /Opinion Segment Labels/data/0h-zjBukYpk/intervals
        shape: (25, 2)
        dtype: float32
        values: [[33.705894 37.666893]
 [43.733105 56.713604]
 

In [3]:
import h5py
import csv

def create_csv_from_hdf5(file_path, output_csv):
    with h5py.File(file_path, 'r') as f:
        # Prepare CSV file for writing
        with open(output_csv, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            
            # Write the header
            header = ['id', 'interval_start', 'interval_end'] + [f'feature_{i}' for i in range(7)]
            csvwriter.writerow(header)

            # Iterate through all subgroups under /All Labels/data
            data_group = f['All Labels/data']
            for key in data_group.keys():
                subgroup = data_group[key]
                features = subgroup['features'][:]
                intervals = subgroup['intervals'][:]
                
                # Write each feature set and its corresponding interval to the CSV file
                for i in range(features.shape[0]):
                    row = [key, intervals[i][0], intervals[i][1]] + list(features[i])
                    csvwriter.writerow(row)

if __name__ == '__main__':
    file_path = 'CMU_MOSI_Labels.csd'  # Path to your HDF5 file
    output_csv = 'mosi.csv'     # Output CSV file name
    create_csv_from_hdf5(file_path, output_csv)
    print(f"CSV file '{output_csv}' created successfully!")


KeyError: 'Unable to open object (component not found)'