In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import torch
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

parent_directory = '/content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/Data/CentrifugalPump'
output_dir = "/content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/TSTCC/data/CentrifugalPump"
os.makedirs(output_dir, exist_ok=True)

all_data = []
all_labels = []
sample_length = 400
label_mapping = {}

print("Checking directories in:", parent_directory)
assert os.path.exists(parent_directory), f"The directory {parent_directory} does not exist."

for idx, class_dir in enumerate(os.listdir(parent_directory)):
    class_path = os.path.join(parent_directory, class_dir)
    if os.path.isdir(class_path):
        print(f"Processing directory: {class_dir}")
        label_mapping[class_dir] = idx
        for filename in os.listdir(class_path):
            if "ABC" in filename and filename.endswith("#1.TXT"):
                file_path = os.path.join(class_path, filename)
                with open(file_path, 'r') as file:
                    # Skip first three lines
                    for _ in range(3):
                        next(file)
                    # Read the rest of the data, convert to float
                    data = [float(line.strip()) for line in file if line.strip()]
                    if data:
                        # Ensure that data length is a multiple of sample_length
                        num_samples_file = len(data) // sample_length
                        data = data[:num_samples_file * sample_length]
                        all_data.append(data)
                        all_labels.extend([idx] * num_samples_file)  # Assign label idx to each segment

if not all_data:
    raise ValueError("No data files were read. Please check the file paths and formats.")

# Concatenate all data into one long array
all_data = np.concatenate(all_data)

# Reshape data into samples of 400 data points
all_data = all_data.reshape(-1, sample_length)

# Normalize the data
scaler = MinMaxScaler()
all_data = scaler.fit_transform(all_data)

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(all_data, all_labels, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Save the datasets as .pt files
datasets = {
    "train": (X_train, y_train),
    "val": (X_val, y_val),
    "test": (X_test, y_test)
}


for set_name, (X, y) in datasets.items():
    y = np.array(y)  # Ensure y is a numpy array
    torch.save({
        "samples": torch.from_numpy(X).unsqueeze(1),  # Add channel dimension
        "labels": torch.from_numpy(y)
    }, os.path.join(output_dir, f"{set_name}.pt"))

print(f"Data sets saved in {output_dir}")
print(f"Label mapping: {label_mapping}")

label_counts = Counter(all_labels)
for label, count in label_counts.items():
    print(f"Label {label} (mapped from {list(label_mapping.keys())[list(label_mapping.values()).index(label)]}): {count} samples")


Checking directories in: /content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/Data/CentrifugalPump
Processing directory: Bi-spectrum counter maps under impeller wearing fault condition
Processing directory: bearing outer race wearing fault condition
Processing directory: bearing inner race wearing fault condition
Processing directory: normal
Processing directory: bearing roller wearing fault condition
Data sets saved in /content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/TSTCC/data/CentrifugalPump
Label mapping: {'Bi-spectrum counter maps under impeller wearing fault condition': 0, 'bearing outer race wearing fault condition': 1, 'bearing inner race wearing fault condition': 2, 'normal': 3, 'bearing roller wearing fault condition': 4}
Label 0 (mapped from Bi-spectrum counter maps under impeller wearing fault condition): 255 samples
Label 1 (mapped from bearing outer race wearing fault condition): 255 samples
Label 2 (mapped from bearing inner race