In [1]:
import pandas as pd
import os

# Read the label file and format the file name list
labels_df = pd.read_csv('labels.csv', header=0, names=['filename', 'label'])
labels_df['filename'] = labels_df['filename'].apply(lambda x: f'{x:04}')
labels_df['label'] = labels_df['label'].map({'OK': 1, 'NOK': 0}) # Convert labels to 0 and 1

# Convert labels_df to dictionary where filenames are keys and labels are values(0/1)
labels_dict = dict(zip(labels_df['filename'], labels_df['label']))
# print(labels_dict)

labels_dict :{'0025': 0, '0028': 1, '0031': 1, '0032': 0, '0036': 1, '0037': 1, ...} 

In [2]:
# Create an empty dictionary to store all DataFrames
data_frames = {}

# Specify the directory containing the CSV file
data_directory = 'data/'

# Iterate through all files in a directory
# i = 0
for filename in os.listdir(data_directory):
    if filename.endswith('.csv'):
        # Remove file extension and get base name of file (e.g. 0025.csv -> 0025)
        base_filename = filename[:-4]

        # Read CSV file
        file_path = os.path.join(data_directory, filename)
        temp_df = pd.read_csv(file_path)
        # Extract seconds from time and convert them into a more manageable numerical form
        temp_df['time'] = pd.to_datetime(temp_df['time']) 
        temp_df['seconds'] = temp_df['time'].dt.second + temp_df['time'].dt.microsecond / 1e6 
        # i += 1
        # print(f"dataframe of {file_path} is read, it is the {i}th one")

        # Store DataFrame into dictionary
        data_frames[base_filename] = temp_df

# When you need to access to a specific file for example '0025.csv'
specific_filename = '0025'
print(data_frames['0025'].head())

# When you need to view the tag of a specific file
if specific_filename in labels_dict:
    specific_label = labels_dict[specific_filename]
    print(f"Label for file '{specific_filename}': {specific_label}")

dataframe of data/0025.csv is read, it is the 1th one
dataframe of data/0028.csv is read, it is the 2th one
dataframe of data/0031.csv is read, it is the 3th one
dataframe of data/0032.csv is read, it is the 4th one
dataframe of data/0036.csv is read, it is the 5th one
dataframe of data/0037.csv is read, it is the 6th one
dataframe of data/0041.csv is read, it is the 7th one
dataframe of data/0055.csv is read, it is the 8th one
dataframe of data/0079.csv is read, it is the 9th one
dataframe of data/0097.csv is read, it is the 10th one
dataframe of data/0100.csv is read, it is the 11th one
dataframe of data/0103.csv is read, it is the 12th one
dataframe of data/0105.csv is read, it is the 13th one
dataframe of data/0108.csv is read, it is the 14th one
dataframe of data/0109.csv is read, it is the 15th one
dataframe of data/0123.csv is read, it is the 16th one
dataframe of data/0124.csv is read, it is the 17th one
dataframe of data/0129.csv is read, it is the 18th one
dataframe of data/0

We have two dictionaries that store the labels and data for files:

- `labels_dict`: This dictionary uses four-digit filenames (e.g., `'0025'`) as keys, with each key corresponding to the label of a specific file. By using the filename as a key, you can access the label of any particular file. For instance, `labels_dict['0025']` provides access to the label of the file `'0025.csv'`.

- `data_frames`: Similarly, this dictionary also uses four-digit filenames as keys (e.g., `'0025'`), where each key corresponds to a DataFrame that contains the data from the associated CSV file. For example, `data_frames['0025']` will give you the data associated with the file `'0025.csv'`.