In [1]:
import h5py

In [None]:
with h5py.File('../300k_150x5_2.h5', 'r') as f:
    # List all top-level keys (groups or datasets)
    print("Keys in HDF5 file:", list(f.keys()))
    for key in f.keys():
        print(key)
        print(f[key].shape)
        print(f[key].dtype)
        print()

Keys in HDF5 file: ['X', 'max_num_classes', 'num_datapoints', 'num_features', 'original_batch_size', 'problem_type', 'single_eval_pos', 'y']
X
(300000, 150, 5)
float32

max_num_classes
(1,)
int64

num_datapoints
(300000,)
int32

num_features
(300000,)
int32

original_batch_size
(1,)
int64

problem_type
()
object

single_eval_pos
(300000,)
int32

y
(300000, 150)
float32



In [11]:
# Option 1: Access data within a with block (recommended for large files)
with h5py.File('../300k_150x5_2.h5', 'r') as f:
    # Access first 5 samples
    X_sample = f['X'][:5]  # Shape: (5, 150, 5)
    y_sample = f['y'][:5]  # Shape: (5, 150)
    num_features_sample = f['num_features'][:5]
    num_datapoints_sample = f['num_datapoints'][:5]
    single_eval_pos_sample = f['single_eval_pos'][:5]
    
    print("X sample shape:", X_sample.shape)
    print("y sample shape:", y_sample.shape)
    print("num_features:", num_features_sample)
    print("num_datapoints:", num_datapoints_sample)
    print("single_eval_pos:", single_eval_pos_sample)
    
    # Access a specific element
    print("\nFirst sample X[0]:", X_sample[0])
    print("First sample y[0]:", y_sample[0])

# Option 2: Load into numpy array (use for smaller datasets or when you need the data after file closes)
# import numpy as np
# with h5py.File('../300k_150x5_2.h5', 'r') as f:
#     X_data = np.array(f['X'][:5])  # Convert to numpy array
#     y_data = np.array(f['y'][:5])
# 
# # Now X_data and y_data are regular numpy arrays that persist after file closes
# print(X_data.shape)

X sample shape: (5, 150, 5)
y sample shape: (5, 150)
num_features: [5 5 5 5 5]
num_datapoints: [150 150 150 150 150]
single_eval_pos: [ 82  82  82  82 120]

First sample X[0]: [[ 9.05124605e-01 -4.99249816e-01  8.54961574e-01 -5.86468697e-01
  -1.91476807e-01]
 [-6.22734129e-01  1.28157139e+00  1.19051981e+00 -1.88702798e+00
   3.52952242e-01]
 [ 1.17355752e+00 -2.09907085e-01  6.51147187e-01  8.07275593e-01
   1.64774060e+00]
 [-5.58242537e-02 -2.02288795e-02  4.66561913e-01  1.07694530e+00
  -1.32971644e+00]
 [ 2.31664991e+00  8.04326460e-02  5.59298992e-01  1.36577570e+00
   9.57889199e-01]
 [-5.37312627e-02  1.33754194e+00  5.33944547e-01  1.45086038e+00
   2.54583508e-01]
 [ 2.13447094e-01 -6.46191359e-01  1.01266837e+00 -7.52246618e-01
   1.29621938e-01]
 [ 7.82729030e-01  2.77617007e-01 -3.21967542e-01  6.79712951e-01
   1.21415901e+00]
 [ 8.06456804e-01  1.76237166e-01  1.91726160e+00  5.78321934e-01
  -4.65548456e-01]
 [ 1.40579939e+00  1.94073677e+00  1.13523221e+00 -5.673558

In [14]:
X_sample[0].shape

(150, 5)

In [15]:
y_sample[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)