In [28]:
import h5py

def explore_h5py_structure(item, indent=0):
    """Recursively explore the HDF5 structure."""
    indentation = "  " * indent
    if isinstance(item, h5py.Group):
        print(f"{indentation}Group: {item.name}")
        for key in item.keys():
            explore_h5py_structure(item[key], indent + 1)
    elif isinstance(item, h5py.Dataset):
        print(f"{indentation}Dataset: {item.name}")
        print(f"{indentation}  shape: {item.shape}")
        print(f"{indentation}  dtype: {item.dtype}")
        print(f"{indentation}  values: {item[:5]}...")  # Show the first 5 entries for brevity

def read_cmu_mosei_labels(file_path):
    with h5py.File(file_path, 'r') as f:
        print(type(f))  # Print the type of the file object
        for key in f.keys():
            print(f"Key: {key}")
            explore_h5py_structure(f[key])

if __name__ == '__main__':
    file_path = 'CMU_MOSEI_Labels.csd'
    read_cmu_mosei_labels(file_path)


<class 'h5py._hl.files.File'>
Key: All Labels
Group: /All Labels
  Group: /All Labels/data
    Group: /All Labels/data/--qXJuDtHPw
      Dataset: /All Labels/data/--qXJuDtHPw/features
        shape: (1, 7)
        dtype: float32
        values: [[1.        0.6666667 0.        0.        0.        0.        0.       ]]...
      Dataset: /All Labels/data/--qXJuDtHPw/intervals
        shape: (1, 2)
        dtype: float64
        values: [[23.199 30.325]]...
    Group: /All Labels/data/-3g5yACwYnA
      Dataset: /All Labels/data/-3g5yACwYnA/features
        shape: (6, 7)
        dtype: float32
        values: [[1.         0.6666667  0.6666667  0.         0.         0.
  0.6666667 ]
 [0.6666667  0.         0.         0.         0.         0.
  0.        ]
 [0.         0.6666667  0.6666667  0.         0.         0.
  0.33333334]
 [0.         0.33333334 0.33333334 0.         0.         0.
  0.        ]
 [1.         0.6666667  0.         0.         0.         0.
  0.        ]]...
      Dataset:

In [29]:
import h5py
import csv

def create_csv_from_hdf5(file_path, output_csv):
    with h5py.File(file_path, 'r') as f:
        # Prepare CSV file for writing
        with open(output_csv, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile)
            
            # Write the header
            header = ['id', 'interval_start', 'interval_end'] + [f'feature_{i}' for i in range(7)]
            csvwriter.writerow(header)

            # Iterate through all subgroups under /All Labels/data
            data_group = f['All Labels/data']
            for key in data_group.keys():
                subgroup = data_group[key]
                features = subgroup['features'][:]
                intervals = subgroup['intervals'][:]
                
                # Write each feature set and its corresponding interval to the CSV file
                for i in range(features.shape[0]):
                    row = [key, intervals[i][0], intervals[i][1]] + list(features[i])
                    csvwriter.writerow(row)

if __name__ == '__main__':
    file_path = 'CMU_MOSEI_Labels.csd'  # Path to your HDF5 file
    output_csv = 'mosei_labels.csv'     # Output CSV file name
    create_csv_from_hdf5(file_path, output_csv)
    print(f"CSV file '{output_csv}' created successfully!")


CSV file 'mosei_labels.csv' created successfully!
