In [4]:
import pandas as pd 

In [5]:
csv_path = "./Data Science Data Collection - Sheet1.csv"
h5_path = "./foot_traffic_dataset.h5"

In [3]:
df = pd.read_csv(csv_path)

In [4]:
df.head()

Unnamed: 0,Date,Time,Student Count (1 min interval starting at time)
0,2024-09-24,9:00 AM,15
1,2024-09-24,9:15 AM,8
2,2024-09-24,9:30 AM,4
3,2024-09-24,9:45 AM,13
4,2024-09-24,10:00 AM,17


In [5]:
df.to_hdf(h5_path, key='data', mode='w')

In [1]:
import h5py 
import xml.etree.ElementTree as ET 

In [2]:
def add_attributes_to_xml(obj, xml_element):
    """ Helper function to add attributes of a group/dataset to XML element """
    for key, value in obj.attrs.items():
        attr_elem = ET.SubElement(xml_element, 'Attribute', name=key)
        attr_elem.text = str(value)

def explore_h5_file_and_generate_xml(file_path, xml_output_path):
    """ Function to explore HDF5 file and generate an XML file with metadata """
    # Create the root element of the XML
    root = ET.Element('HDF5_Metadata')

    with h5py.File(file_path, 'r') as f:
        # Function to traverse groups and datasets
        def print_structure(name, obj):
            if isinstance(obj, h5py.Group):
                # Create XML element for group
                group_elem = ET.SubElement(root, 'Group', name=name)
                add_attributes_to_xml(obj, group_elem)
            elif isinstance(obj, h5py.Dataset):
                # Create XML element for dataset
                dataset_elem = ET.SubElement(root, 'Dataset', name=name)
                # Add shape and dtype as attributes
                shape_elem = ET.SubElement(dataset_elem, 'Shape')
                shape_elem.text = str(obj.shape)
                dtype_elem = ET.SubElement(dataset_elem, 'DataType')
                dtype_elem.text = str(obj.dtype)
                # Add any other attributes
                add_attributes_to_xml(obj, dataset_elem)

        # Walk through the HDF5 file structure
        f.visititems(print_structure)

    # Write the XML structure to an XML file
    tree = ET.ElementTree(root)
    tree.write(xml_output_path, encoding='utf-8', xml_declaration=True)
    print(f"XML metadata saved to {xml_output_path}")


In [6]:
xml_output_file = 'metadata_output.xml'

# Call the function to explore the HDF5 file and generate the XML file
explore_h5_file_and_generate_xml(h5_path, xml_output_file)

XML metadata saved to metadata_output.xml


In [1]:
import h5py

In [5]:
f = h5py.File('./foot_traffic_dataset.h5', 'r')

In [6]:
print(list(f.keys()))

['data']


In [10]:
def print_structure(name, obj):
    print(name)
    if isinstance(obj, h5py.Group):
        print("Group:", name)
    elif isinstance(obj, h5py.Dataset):
        print("Dataset:", name, obj.shape, obj.dtype)

f.visititems(print_structure)

data
Group: data
data/axis0
Dataset: data/axis0 (3,) |S47
data/axis1
Dataset: data/axis1 (51,) int64
data/block0_items
Dataset: data/block0_items (2,) |S4
data/block0_values
Dataset: data/block0_values (1,) object
data/block1_items
Dataset: data/block1_items (1,) |S47
data/block1_values
Dataset: data/block1_values (51, 1) int64


In [13]:
dataset = f['data/axis0']
print(dataset.dtype)

|S47


In [14]:
dataset[:]

array([b'Date', b'Time',
       b'Student Count (1 min interval starting at time)'], dtype='|S47')

In [15]:
f['data/axis1'][:]

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [16]:
f['data/block0_items'][:]

array([b'Date', b'Time'], dtype='|S4')

In [17]:
f['data/block0_values'][:]

array([array([128,   5, 149,  85,   2,   0,   0,   0,   0,   0,   0, 140,  21,
              110, 117, 109, 112, 121,  46,  99, 111, 114, 101,  46, 109, 117,
              108, 116, 105,  97, 114, 114,  97, 121, 148, 140,  12,  95, 114,
              101,  99, 111, 110, 115, 116, 114, 117,  99, 116, 148, 147, 148,
              140,   5, 110, 117, 109, 112, 121, 148, 140,   7, 110, 100,  97,
              114, 114,  97, 121, 148, 147, 148,  75,   0, 133, 148,  67,   1,
               98, 148, 135, 148,  82, 148,  40,  75,   1,  75,  51,  75,   2,
              134, 148, 104,   3, 140,   5, 100, 116, 121, 112, 101, 148, 147,
              148, 140,   2,  79,  56, 148, 137, 136, 135, 148,  82, 148,  40,
               75,   3, 140,   1, 124, 148,  78,  78,  78,  74, 255, 255, 255,
              255,  74, 255, 255, 255, 255,  75,  63, 116, 148,  98, 136,  93,
              148,  40, 140,  10,  50,  48,  50,  52,  45,  48,  57,  45,  50,
               52, 148, 140,   7,  57,  58,  48,  48

In [18]:
f['data/block1_items'][:]

array([b'Student Count (1 min interval starting at time)'], dtype='|S47')

In [19]:
f['data/block1_values'][:]

array([[15],
       [ 8],
       [ 4],
       [13],
       [17],
       [13],
       [ 5],
       [ 9],
       [18],
       [13],
       [ 4],
       [16],
       [13],
       [ 7],
       [ 6],
       [16],
       [14],
       [ 7],
       [ 4],
       [ 5],
       [ 5],
       [ 4],
       [ 3],
       [ 1],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 7],
       [ 2],
       [ 3],
       [ 4],
       [15],
       [11],
       [ 9],
       [12],
       [12],
       [11],
       [ 7],
       [16],
       [10],
       [ 6],
       [ 4],
       [ 5],
       [ 3],
       [ 3],
       [ 1],
       [ 6],
       [ 1],
       [ 2],
       [ 2]])