In [15]:
#Import essential libraries
import os
import pandas as pd
import numpy as np
import sklearn 
import matplotlib.pyplot as plt
import seaborn as sns
import h5py

In [16]:
df = pd.read_csv('data/METR-LA.h5')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte

In [17]:
df = pd.read_hdf('data/METR-LA.h5')

TypeError: a bytes-like object is required, not 'str'

In [18]:
# Read the METR-LA traffic dataset it is in HDF5 format
df = pd.read_hdf('data/METR-LA.h5')
# Display the first few rows of the dataset
print(df.head())


TypeError: a bytes-like object is required, not 'str'

In [19]:
file_path = 'data/METR-LA.h5'

try:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file '{file_path}' was not found.")

    with h5py.File(file_path, 'r') as f:
        # Check if 'df' group exists
        if 'df' not in f:
            raise KeyError(f"Expected group 'df' not found. Available keys: {list(f.keys())}")
        
        # Accessing the dataset components
        # Using .get() prevents immediate crashes if a sub-key is missing
        data_node = f['df'].get('block0_values')
        index_node = f['df'].get('axis1')
        columns_node = f['df'].get('axis0')

        if data_node is None or index_node is None or columns_node is None:
            raise ValueError("HDF5 file structure is missing internal 'axis' or 'block' keys.")

        data = data_node[:]
        index = index_node[:]
        columns = columns_node[:]

    # Decoding logic with safety checks
    columns = [c.decode('utf-8') if isinstance(c, bytes) else str(c) for c in columns]
    
    if index.dtype == np.int64:
        index = pd.to_datetime(index)
    else:
        index = pd.to_datetime([i.decode('utf-8') if isinstance(i, bytes) else i for i in index])

    df = pd.DataFrame(data, index=index, columns=columns)
    print("Successfully loaded METR-LA dataset.")

except FileNotFoundError as e:
    print(f"File Error: {e}")
except KeyError as e:
    print(f"Key Error (Data Structure): {e}")
except PermissionError:
    print("Error: Permission denied. Close the file if it's open in another program.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# Proceed only if 'df' was successfully created
if 'df' in locals():
    print(df.head())

Successfully loaded METR-LA dataset.
                        773869     767541     767542     717447     717446  \
2012-03-01 00:00:00  64.375000  67.625000  67.125000  61.500000  66.875000   
2012-03-01 00:05:00  62.666667  68.555556  65.444444  62.444444  64.444444   
2012-03-01 00:10:00  64.000000  63.750000  60.000000  59.000000  66.500000   
2012-03-01 00:15:00   0.000000   0.000000   0.000000   0.000000   0.000000   
2012-03-01 00:20:00   0.000000   0.000000   0.000000   0.000000   0.000000   

                        717445  773062  767620     737529     717816  ...  \
2012-03-01 00:00:00  68.750000  65.125  67.125  59.625000  62.750000  ...   
2012-03-01 00:05:00  68.111111  65.000  65.000  57.444444  63.333333  ...   
2012-03-01 00:10:00  66.250000  64.500  64.250  63.875000  65.375000  ...   
2012-03-01 00:15:00   0.000000   0.000   0.000   0.000000   0.000000  ...   
2012-03-01 00:20:00   0.000000   0.000   0.000   0.000000   0.000000  ...   

                        772167 