In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import torch
import numpy as np
import os
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

# Define the directory path containing .mat files
directory_path = '/content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/Data/ElectricMotorFaults'

# Features index (adjust according to the specific feature you are interested in)
features_index = 3

# Initialize lists to hold all data and labels
all_data = []
all_labels = []

# Iterate over files in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.mat'):
        # Construct the full file path
        file_path = os.path.join(directory_path, filename)
        # Load the MATLAB file
        mat_data = loadmat(file_path)

        # Extract data and labels depending on the structure of your .mat files
        # Adjust these lines according to your .mat file structure:
        data = mat_data['train_data'][:, 401:976, features_index]
        labels = mat_data['label_data']

        print(labels.size)


        # Append data and labels to the lists
        all_data.append(data)
        all_labels.append(labels)

# Convert lists to numpy arrays
all_data = np.concatenate(all_data, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

all_labels = np.squeeze(all_labels)

# Scale the data
scaler = MinMaxScaler()
all_data_scaled = scaler.fit_transform(all_data.reshape(-1, all_data.shape[-1])).reshape(all_data.shape)

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(all_data_scaled, all_labels, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Specify the output directory for storing the tensor files
output_dir = "/content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/TSTCC/data/ElectricMotorFaults"
os.makedirs(output_dir, exist_ok=True)

# Save the datasets as .pt files
datasets = {
    "train": (X_train, y_train),
    "val": (X_val, y_val),
    "test": (X_test, y_test)
}

for set_name, (X, y) in datasets.items():
    torch.save({
        "samples": torch.from_numpy(X).unsqueeze(1),  # Add channel dimension
        "labels": torch.from_numpy(y)
    }, os.path.join(output_dir, f"{set_name}.pt"))

print(f"Data sets saved in {output_dir}")

329
703
518
322
753
767
Data sets saved in /content/drive/MyDrive/FYP_Nur_Time_Series_Representation_using_CL-main/TSTCC/data/ElectricMotorFaults
