### ArticularyWordRecognition

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Step 1: Load the dataset (replace 'your_arff_file.arff' with the actual path)
arff_file_path = '/content/ArticularyWordRecognitionDimension1_TRAIN.arff'  # Update the path as needed

# Load ARFF file into a Pandas DataFrame
data, meta = arff.loadarff(arff_file_path)
df = pd.DataFrame(data)

# Step 2: Convert class labels from byte to float
df['classAttribute'] = df['classAttribute'].apply(lambda x: float(x.decode('utf-8')))

# Step 3: Separate features and labels
features = df.drop(columns=['classAttribute'])
labels = df['classAttribute']

# Step 4: Normalize the features using StandardScaler
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)

# Step 5: Reshape the features into a 3D array (samples, time_steps, features)
# Here, we assume 144 time steps and 1 feature per time step
time_steps = 144  # Assuming 144 channels, we treat each as a time step
num_features = 1  # 1 feature per time step (each channel is treated as a feature)
X = features_normalized.reshape(-1, time_steps, num_features)

# Step 6: Convert labels to a NumPy array
y = labels.values

unique_labels = np.unique(y)
new_label_mapping = {label: idx for idx, label in enumerate(unique_labels)}

# Apply the mapping to the y array
y = np.array([new_label_mapping[label] for label in y])

# Step 7: Split the dataset into training and testing sets (90% training, 10% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

# Step 8: Further split the training data into training and validation sets (80% train, 20% validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

# Step 9: Further split the training data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Step 10: Check the number of classes
n_classes = len(np.unique(y_train))
print(f"Number of classes: {n_classes}")

# Output the shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 25
X_train shape: (197, 144, 1), y_train shape: (197,)
X_valid shape: (50, 144, 1), y_valid shape: (50,)
X_test shape: (28, 144, 1), y_test shape: (28,)


In [None]:
# Inspect unique values of the classAttribute before processing
print("Unique class labels before processing:", df['classAttribute'].unique())

Unique class labels before processing: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23. 24. 25.]


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Step 1: Load the dataset (replace 'your_arff_file.arff' with the actual path)
arff_file_path = '/content/ArticularyWordRecognitionDimension1_TRAIN.arff'  # Update the path as needed

# Load ARFF file into a Pandas DataFrame
data, meta = arff.loadarff(arff_file_path)
df = pd.DataFrame(data)

# Step 2: Inspect unique class labels before processing
print("Unique class labels before processing:", df['classAttribute'].unique())

# Step 3: Convert class labels to binary or categorical (assuming 2 classes)
# Here you can adjust based on actual class names or values
df['classAttribute'] = df['classAttribute'].apply(lambda x: 0 if x == b'Class1' else 1)

# Step 4: Separate features and labels
features = df.drop(columns=['classAttribute'])
labels = df['classAttribute']

# Step 5: Normalize the features using StandardScaler
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)

# Step 6: Reshape the features into a 3D array (samples, time_steps, features)
time_steps = 144  # Assuming 144 channels, we treat each as a time step
num_features = 1  # 1 feature per time step (each channel is treated as a feature)
X = features_normalized.reshape(-1, time_steps, num_features)

# Step 7: Convert labels to a NumPy array
y = labels.values

# Step 8: Split the dataset into training and testing sets (90% training, 10% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

# Step 9: Further split the training data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Step 10: Check the number of classes
n_classes = len(np.unique(y_train))
print(f"Number of classes: {n_classes}")

# Output the shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Unique class labels before processing: [b'1.0' b'2.0' b'3.0' b'4.0' b'5.0' b'6.0' b'7.0' b'8.0' b'9.0' b'10.0'
 b'11.0' b'12.0' b'13.0' b'14.0' b'15.0' b'16.0' b'17.0' b'18.0' b'19.0'
 b'20.0' b'21.0' b'22.0' b'23.0' b'24.0' b'25.0']
Number of classes: 1
X_train shape: (197, 144, 1), y_train shape: (197,)
X_valid shape: (50, 144, 1), y_valid shape: (50,)
X_test shape: (28, 144, 1), y_test shape: (28,)


In [None]:
unique_labels = np.unique(y)
new_label_mapping = {label: idx for idx, label in enumerate(unique_labels)}

# Apply the mapping to the y array
y = np.array([new_label_mapping[label] for label in y])

# Verify the remapped labels
print(f"Remapped labels: {np.unique(y)}")

Remapped labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]


In [None]:
n_classes = len(np.unique(y_train))
print(f"Number of classes: {n_classes}")

# Output the shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

Number of classes: 25
X_train shape: torch.Size([197, 144, 1]), y_train shape: (197,)
X_valid shape: torch.Size([50, 144, 1]), y_valid shape: (50,)
X_test shape: torch.Size([28, 144, 1]), y_test shape: (28,)


### AtrialFibrillation

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.io import arff

# Data Augmentation Function (Jittering)
def jitter(data, sigma=0.01):
    """Add random noise to the data."""
    return data + np.random.normal(loc=0.0, scale=sigma, size=data.shape)

# Load the ARFF file (AtrialFibrillationDimension1_TRAIN.arff)
arff_file_path = '/content/AtrialFibrillationDimension1_TRAIN.arff'

# Load ARFF file into a Pandas DataFrame
data, meta = arff.loadarff(arff_file_path)
df = pd.DataFrame(data)

# Step 1: Convert class labels from byte to integer
df['target'] = df['target'].apply(lambda x: 1 if x == b'n' else 0)

# Step 2: Separate features and labels
features = df.drop(columns=['target'])
labels = df['target']

# Step 3: Normalize the features using StandardScaler
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features)

# Step 4: Reshape the features into a 3D array (samples, time_steps, features)
time_steps = 640  # Number of time steps based on the dataset shape
num_features = 1  # We have 1 feature per time step (each column treated as a feature)
X = features_normalized.reshape(-1, time_steps, num_features)

# Step 5: Convert labels to a NumPy array
y = labels.values

# Step 6: Apply jitter to augment the dataset
X_augmented = np.concatenate([X, jitter(X)], axis=0)
y_augmented = np.concatenate([y, y], axis=0)  # Duplicate labels for augmented data

# Step 7: Split the dataset into training, validation, and test sets (optional)
# You can split the data further if you want to use a fixed validation set.
X_train, X_test, y_train, y_test = train_test_split(X_augmented, y_augmented, test_size=0.10, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Output the shapes of the augmented dataset for inspection
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (21, 640, 1), y_train shape: (21,)
X_valid shape: (6, 640, 1), y_valid shape: (6,)
X_test shape: (3, 640, 1), y_test shape: (3,)


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Load both dimensions
arff_file_path_dim1 = '/content/AtrialFibrillationDimension1_TRAIN.arff'
arff_file_path_dim2 = '/content/AtrialFibrillationDimension2_TRAIN.arff'

# Load ARFF files into DataFrames
data_dim1, meta_dim1 = arff.loadarff(arff_file_path_dim1)
df_dim1 = pd.DataFrame(data_dim1)

data_dim2, meta_dim2 = arff.loadarff(arff_file_path_dim2)
df_dim2 = pd.DataFrame(data_dim2)

# Step 1: Separate features and labels for both dimensions
features_dim1 = df_dim1.drop(columns=['target'])
features_dim2 = df_dim2.drop(columns=['target'])
labels = df_dim1['target'].apply(lambda x: 1 if x == b'n' else 0)  # Convert target to integer

# Step 2: Normalize the features using StandardScaler for both dimensions
scaler = StandardScaler()
features_dim1_normalized = scaler.fit_transform(features_dim1)
features_dim2_normalized = scaler.fit_transform(features_dim2)

# Step 3: Stack both dimensions along a new axis (multivariate time-series)
X_multivariate = np.stack([features_dim1_normalized, features_dim2_normalized], axis=-1)

# Step 4: Convert labels to a NumPy array
y = labels.values

# Step 5: Split the dataset into training and testing sets (90% training, 10% testing)
X_train, X_test, y_train, y_test = train_test_split(X_multivariate, y, test_size=0.10, random_state=42)

# Step 6: Further split the training data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Output the shapes of the datasets for inspection
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (10, 640, 2), y_train shape: (10,)
X_valid shape: (3, 640, 2), y_valid shape: (3,)
X_test shape: (2, 640, 2), y_test shape: (2,)


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Load both dimensions for training set
train_file_path_dim1 = '/content/AtrialFibrillationDimension1_TRAIN.arff'
train_file_path_dim2 = '/content/AtrialFibrillationDimension2_TRAIN.arff'

# Load ARFF files into DataFrames for training
train_data_dim1, train_meta_dim1 = arff.loadarff(train_file_path_dim1)
train_df_dim1 = pd.DataFrame(train_data_dim1)

train_data_dim2, train_meta_dim2 = arff.loadarff(train_file_path_dim2)
train_df_dim2 = pd.DataFrame(train_data_dim2)

# Load both dimensions for test set
test_file_path_dim1 = '/content/AtrialFibrillationDimension1_TEST.arff'
test_file_path_dim2 = '/content/AtrialFibrillationDimension2_TEST.arff'

test_data_dim1, test_meta_dim1 = arff.loadarff(test_file_path_dim1)
test_df_dim1 = pd.DataFrame(test_data_dim1)

test_data_dim2, test_meta_dim2 = arff.loadarff(test_file_path_dim2)
test_df_dim2 = pd.DataFrame(test_data_dim2)

# Step 1: Separate features and labels for both dimensions in training set
train_features_dim1 = train_df_dim1.drop(columns=['target'])
train_features_dim2 = train_df_dim2.drop(columns=['target'])
train_labels = train_df_dim1['target'].apply(lambda x: 1 if x == b'n' else 0)  # Convert target to integer

# Step 2: Separate features and labels for both dimensions in test set
test_features_dim1 = test_df_dim1.drop(columns=['target'])
test_features_dim2 = test_df_dim2.drop(columns=['target'])
test_labels = test_df_dim1['target'].apply(lambda x: 1 if x == b'n' else 0)

# Step 3: Normalize the features using StandardScaler for both dimensions in train and test sets
scaler_dim1 = StandardScaler()
scaler_dim2 = StandardScaler()

# Standardize each dimension independently
train_features_dim1_normalized = scaler_dim1.fit_transform(train_features_dim1)
train_features_dim2_normalized = scaler_dim2.fit_transform(train_features_dim2)

test_features_dim1_normalized = scaler_dim1.transform(test_features_dim1)  # Use the same scaler for test data
test_features_dim2_normalized = scaler_dim2.transform(test_features_dim2)

# Step 4: Stack both dimensions along a new axis (multivariate time-series)
X_train = np.stack([train_features_dim1_normalized, train_features_dim2_normalized], axis=-1)
X_test = np.stack([test_features_dim1_normalized, test_features_dim2_normalized], axis=-1)

# Step 5: Convert labels to a NumPy array
y_train = train_labels.values
y_test = test_labels.values

# Step 6: Split the training data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Output the shapes of the datasets for inspection
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (12, 640, 2), y_train shape: (12,)
X_valid shape: (3, 640, 2), y_valid shape: (3,)
X_test shape: (15, 640, 2), y_test shape: (15,)


In [None]:
import torch

# Calculate the number of unique classes again
n_classes_train = len(np.unique(y_train))

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Print the number of unique classes and dataset shapes in the desired format
print(f"Number of classes: {n_classes_train}")
print(f"X_train shape: {X_train_tensor.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid_tensor.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test_tensor.shape}, y_test shape: {y_test.shape}")


Number of classes: 2
X_train shape: torch.Size([12, 640, 2]), y_train shape: (12,)
X_valid shape: torch.Size([3, 640, 2]), y_valid shape: (3,)
X_test shape: torch.Size([15, 640, 2]), y_test shape: (15,)


In [None]:
# Print unique class labels in the training set
unique_classes_train = train_df_dim1['target'].unique()
print("Unique class labels in the training set:", unique_classes_train)

# Print unique class labels in the test set
unique_classes_test = test_df_dim1['target'].unique()
print("Unique class labels in the test set:", unique_classes_test)


Unique class labels in the training set: [b'n' b's' b't']
Unique class labels in the test set: [b'n' b's' b't']


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Load both dimensions for training set
train_file_path_dim1 = '/content/AtrialFibrillationDimension1_TRAIN.arff'
train_file_path_dim2 = '/content/AtrialFibrillationDimension2_TRAIN.arff'

# Load ARFF files into DataFrames for training
train_data_dim1, train_meta_dim1 = arff.loadarff(train_file_path_dim1)
train_df_dim1 = pd.DataFrame(train_data_dim1)

train_data_dim2, train_meta_dim2 = arff.loadarff(train_file_path_dim2)
train_df_dim2 = pd.DataFrame(train_data_dim2)

# Load both dimensions for test set
test_file_path_dim1 = '/content/AtrialFibrillationDimension1_TEST.arff'
test_file_path_dim2 = '/content/AtrialFibrillationDimension2_TEST.arff'

test_data_dim1, test_meta_dim1 = arff.loadarff(test_file_path_dim1)
test_df_dim1 = pd.DataFrame(test_data_dim1)

test_data_dim2, test_meta_dim2 = arff.loadarff(test_file_path_dim2)
test_df_dim2 = pd.DataFrame(test_data_dim2)

# Step 1: Separate features and labels for both dimensions in training set
train_features_dim1 = train_df_dim1.drop(columns=['target'])
train_features_dim2 = train_df_dim2.drop(columns=['target'])

# Updated label mapping
train_labels = train_df_dim1['target'].apply(lambda x: 0 if x == b'n' else (1 if x == b's' else 2))

# Step 2: Separate features and labels for both dimensions in test set
test_features_dim1 = test_df_dim1.drop(columns=['target'])
test_features_dim2 = test_df_dim2.drop(columns=['target'])

# Updated label mapping for test set
test_labels = test_df_dim1['target'].apply(lambda x: 0 if x == b'n' else (1 if x == b's' else 2))

# Step 3: Normalize the features using StandardScaler for both dimensions in train and test sets
scaler_dim1 = StandardScaler()
scaler_dim2 = StandardScaler()

# Standardize each dimension independently
train_features_dim1_normalized = scaler_dim1.fit_transform(train_features_dim1)
train_features_dim2_normalized = scaler_dim2.fit_transform(train_features_dim2)

test_features_dim1_normalized = scaler_dim1.transform(test_features_dim1)  # Use the same scaler for test data
test_features_dim2_normalized = scaler_dim2.transform(test_features_dim2)

# Step 4: Stack both dimensions along a new axis (multivariate time-series)
X_train = np.stack([train_features_dim1_normalized, train_features_dim2_normalized], axis=-1)
X_test = np.stack([test_features_dim1_normalized, test_features_dim2_normalized], axis=-1)

# Step 5: Convert labels to a NumPy array
y_train = train_labels.values
y_test = test_labels.values

# Step 6: Split the training data into training and validation sets (80% train, 20% validation)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.20, random_state=42)

# Output the shapes of the datasets for inspection
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (12, 640, 2), y_train shape: (12,)
X_valid shape: (3, 640, 2), y_valid shape: (3,)
X_test shape: (15, 640, 2), y_test shape: (15,)


In [None]:
import torch

# Calculate the number of unique classes again
n_classes_train = len(np.unique(y_train))

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Print the number of unique classes and dataset shapes in the desired format
print(f"Number of classes: {n_classes_train}")
print(f"X_train shape: {X_train_tensor.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid_tensor.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test_tensor.shape}, y_test shape: {y_test.shape}")


Number of classes: 3
X_train shape: torch.Size([12, 640, 2]), y_train shape: (12,)
X_valid shape: torch.Size([3, 640, 2]), y_valid shape: (3,)
X_test shape: torch.Size([15, 640, 2]), y_test shape: (15,)


### RacketSports

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Define the paths for the ARFF files
train_arff_paths = [
    '/content/RacketSportsDimension1_TRAIN.arff',
    '/content/RacketSportsDimension2_TRAIN.arff',
    '/content/RacketSportsDimension3_TRAIN.arff',
    '/content/RacketSportsDimension4_TRAIN.arff',
    '/content/RacketSportsDimension5_TRAIN.arff',
    '/content/RacketSportsDimension6_TRAIN.arff'
]

test_arff_paths = [
    '/content/RacketSportsDimension1_TEST.arff',
    '/content/RacketSportsDimension2_TEST.arff',
    '/content/RacketSportsDimension3_TEST.arff',
    '/content/RacketSportsDimension4_TEST.arff',
    '/content/RacketSportsDimension5_TEST.arff',
    '/content/RacketSportsDimension6_TEST.arff'
]

# Step 1: Load all training and test dimensions
train_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in train_arff_paths]
test_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in test_arff_paths]

# Step 2: Separate features and labels for all dimensions
train_features = [df.drop(columns=['activity']) for df in train_dfs]
test_features = [df.drop(columns=['activity']) for df in test_dfs]

# Step 3: Create a label mapping for the four unique class labels
label_mapping = {
    b'Badminton_Smash': 0,
    b'Badminton_Clear': 1,
    b'Squash_ForehandBoast': 2,
    b'Squash_BackhandBoast': 3
}

# Apply the label mapping to the training and test sets
train_labels = train_dfs[0]['activity'].apply(lambda x: label_mapping[x]).values
test_labels = test_dfs[0]['activity'].apply(lambda x: label_mapping[x]).values

# Step 4: Normalize the features using StandardScaler for each dimension
scalers = [StandardScaler() for _ in range(6)]  # 6 dimensions

train_features_normalized = [scalers[i].fit_transform(train_features[i]) for i in range(6)]
test_features_normalized = [scalers[i].transform(test_features[i]) for i in range(6)]

# Step 5: Stack all dimensions along a new axis (multivariate time-series)
X_train = np.stack(train_features_normalized, axis=-1)
X_test_full = np.stack(test_features_normalized, axis=-1)

# Step 6: Split the test data into validation and test sets
X_valid, X_test, y_valid, y_test = train_test_split(X_test_full, test_labels, test_size=0.60, random_state=42)

y_train = train_labels

# Step 7: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")
print(f"X_train shape: {X_train.shape}, y_train shape: {train_labels.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 4
X_train shape: (151, 30, 6), y_train shape: (151,)
X_valid shape: (60, 30, 6), y_valid shape: (60,)
X_test shape: (92, 30, 6), y_test shape: (92,)


### SelfRegulationSCP1

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Define the paths for the ARFF files
train_arff_paths = [
    '/content/SelfRegulationSCP1Dimension1_TRAIN.arff',
    '/content/SelfRegulationSCP1Dimension2_TRAIN.arff',
    '/content/SelfRegulationSCP1Dimension3_TRAIN.arff',
    '/content/SelfRegulationSCP1Dimension4_TRAIN.arff',
    '/content/SelfRegulationSCP1Dimension5_TRAIN.arff',
    '/content/SelfRegulationSCP1Dimension6_TRAIN.arff'
]

test_arff_paths = [
    '/content/SelfRegulationSCP1Dimension1_TEST.arff',
    '/content/SelfRegulationSCP1Dimension2_TEST.arff',
    '/content/SelfRegulationSCP1Dimension3_TEST.arff',
    '/content/SelfRegulationSCP1Dimension4_TEST.arff',
    '/content/SelfRegulationSCP1Dimension5_TEST.arff',
    '/content/SelfRegulationSCP1Dimension6_TEST.arff'
]

# Step 1: Load all training and test dimensions
train_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in train_arff_paths]
test_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in test_arff_paths]

# Step 2: Separate features and labels for all dimensions
train_features = [df.drop(columns=['cortical']) for df in train_dfs]
test_features = [df.drop(columns=['cortical']) for df in test_dfs]

# Step 3: Create a label mapping for the four unique class labels
label_mapping = {
    b'negativity': 0,
    b'positivity': 1
}

# Apply the label mapping to the training and test sets
train_labels = train_dfs[0]['cortical'].apply(lambda x: label_mapping[x]).values
test_labels = test_dfs[0]['cortical'].apply(lambda x: label_mapping[x]).values

# Step 4: Normalize the features using StandardScaler for each dimension
scalers = [StandardScaler() for _ in range(6)]  # 6 dimensions

train_features_normalized = [scalers[i].fit_transform(train_features[i]) for i in range(6)]
test_features_normalized = [scalers[i].transform(test_features[i]) for i in range(6)]

# Step 5: Stack all dimensions along a new axis (multivariate time-series)
X_train = np.stack(train_features_normalized, axis=-1)
X_test_full = np.stack(test_features_normalized, axis=-1)

# Step 6: Split the test data into validation and test sets
X_valid, X_test, y_valid, y_test = train_test_split(X_test_full, test_labels, test_size=0.60, random_state=42)

y_train = train_labels

# Step 7: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")
print(f"X_train shape: {X_train.shape}, y_train shape: {train_labels.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 2
X_train shape: (268, 896, 6), y_train shape: (268,)
X_valid shape: (117, 896, 6), y_valid shape: (117,)
X_test shape: (176, 896, 6), y_test shape: (176,)


### LSST

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Define the paths for the ARFF files
train_arff_paths = [
    '/content/LSSTDimension1_TRAIN.arff',
    '/content/LSSTDimension2_TRAIN.arff',
    '/content/LSSTDimension3_TRAIN.arff',
    '/content/LSSTDimension4_TRAIN.arff',
    '/content/LSSTDimension5_TRAIN.arff',
    '/content/LSSTDimension6_TRAIN.arff'
]

test_arff_paths = [
    '/content/LSSTDimension1_TEST.arff',
    '/content/LSSTDimension2_TEST.arff',
    '/content/LSSTDimension3_TEST.arff',
    '/content/LSSTDimension4_TEST.arff',
    '/content/LSSTDimension5_TEST.arff',
    '/content/LSSTDimension6_TEST.arff'
]

# Step 1: Load all training and test dimensions
train_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in train_arff_paths]
test_dfs = [pd.DataFrame(arff.loadarff(path)[0]) for path in test_arff_paths]

# Step 2: Separate features and labels for all dimensions
train_features = [df.drop(columns=['target']) for df in train_dfs]
test_features = [df.drop(columns=['target']) for df in test_dfs]

# Combine unique labels from both training and test sets
unique_labels = np.unique(np.concatenate([train_dfs[0]['target'].unique(), test_dfs[0]['target'].unique()]))

# Create a mapping from the byte-encoded labels to integers
label_mapping = {label: idx for idx, label in enumerate(unique_labels)}

# Apply the mapping to convert byte-encoded labels to integers in both training and test sets
train_labels = train_dfs[0]['target'].apply(lambda x: label_mapping[x]).values
test_labels = test_dfs[0]['target'].apply(lambda x: label_mapping[x]).values

# Apply the label mapping to the training and test sets
train_labels = train_dfs[0]['target'].apply(lambda x: label_mapping[x]).values
test_labels = test_dfs[0]['target'].apply(lambda x: label_mapping[x]).values

# Step 4: Normalize the features using StandardScaler for each dimension
scalers = [StandardScaler() for _ in range(6)]  # 6 dimensions

train_features_normalized = [scalers[i].fit_transform(train_features[i]) for i in range(6)]
test_features_normalized = [scalers[i].transform(test_features[i]) for i in range(6)]

# Step 5: Stack all dimensions along a new axis (multivariate time-series)
X_train = np.stack(train_features_normalized, axis=-1)
X_test_full = np.stack(test_features_normalized, axis=-1)

# Step 6: Split the test data into validation and test sets
X_valid, X_test, y_valid, y_test = train_test_split(X_test_full, test_labels, test_size=0.60, random_state=42)

y_train = train_labels

# Step 7: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")
print(f"X_train shape: {X_train.shape}, y_train shape: {train_labels.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 14
X_train shape: (2459, 36, 6), y_train shape: (2459,)
X_valid shape: (986, 36, 6), y_valid shape: (986,)
X_test shape: (1480, 36, 6), y_test shape: (1480,)


In [None]:
# Print the column names of the first dimension's training data to inspect the label column
print(train_dfs[0].columns)

Index(['channel_0_0', 'channel_0_1', 'channel_0_2', 'channel_0_3',
       'channel_0_4', 'channel_0_5', 'channel_0_6', 'channel_0_7',
       'channel_0_8', 'channel_0_9', 'channel_0_10', 'channel_0_11',
       'channel_0_12', 'channel_0_13', 'channel_0_14', 'channel_0_15',
       'channel_0_16', 'channel_0_17', 'channel_0_18', 'channel_0_19',
       'channel_0_20', 'channel_0_21', 'channel_0_22', 'channel_0_23',
       'channel_0_24', 'channel_0_25', 'channel_0_26', 'channel_0_27',
       'channel_0_28', 'channel_0_29', 'channel_0_30', 'channel_0_31',
       'channel_0_32', 'channel_0_33', 'channel_0_34', 'channel_0_35',
       'target'],
      dtype='object')


In [None]:
# Inspect the unique labels in the 'activity' column
print("Unique class labels in the training set:", train_dfs[0]['target'].unique())
print("Unique class labels in the test set:", test_dfs[0]['target'].unique())


Unique class labels in the training set: [b'6' b'15' b'16' b'42' b'52' b'53' b'62' b'64' b'65' b'67' b'88' b'90'
 b'92' b'95']
Unique class labels in the test set: [b'6' b'15' b'16' b'42' b'52' b'53' b'62' b'64' b'65' b'67' b'88' b'90'
 b'92' b'95']


### SharePriceIncrease

In [64]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Paths for the ARFF files (update with your local paths)
train_arff_path = '/content/SharePriceIncrease_TRAIN.arff'
test_arff_path = '/content/SharePriceIncrease_TEST.arff'

# Step 1: Load the ARFF files for training and test sets
train_data, train_meta = arff.loadarff(train_arff_path)
test_data, test_meta = arff.loadarff(test_arff_path)

# Convert to Pandas DataFrames
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

# Step 2: If the training set has an extra row, we can restore it to 965 samples
train_df = train_df.iloc[:965]  # Ensure 965 samples
test_df = test_df.iloc[:965]  # Ensure 965 samples

# Step 3: Separate features and labels for both training and test sets
train_features = train_df.drop(columns=['class'])  # Assuming 'class' is the label column
test_features = test_df.drop(columns=['class'])

# Use the labels from the 'class' column
train_labels = train_df['class'].apply(lambda x: int(x)).values  # Assuming labels are binary
test_labels = test_df['class'].apply(lambda x: int(x)).values

# Step 4: Normalize the features using StandardScaler
scaler = StandardScaler()

train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

# Step 5: Reshape the features into 3D arrays (samples, time_steps, dimensions)
X_train = train_features_normalized.reshape(-1, 60, 1)  # 60 time steps, 1 dimension
X_test = test_features_normalized.reshape(-1, 60, 1)

# Step 6: Split the test data into validation and test sets, ensuring equal sizes
# Set test_size=0.5 to ensure a 50-50 split
X_valid, X_test, y_valid, y_test = train_test_split(X_test, test_labels, test_size=0.60, random_state=42)

y_train = train_labels

# Step 7: Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.int64)

X_valid = torch.tensor(X_valid, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.int64)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.int64)

# DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Step 8: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")

# Output dataset shapes
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 2
X_train shape: torch.Size([965, 60, 1]), y_train shape: torch.Size([965])
X_valid shape: torch.Size([386, 60, 1]), y_valid shape: torch.Size([386])
X_test shape: torch.Size([579, 60, 1]), y_test shape: torch.Size([579])


In [54]:
# Print the columns of the DataFrame to find the correct label column
print(train_df.columns)
print(test_df.columns)


Index(['t-60', 't-59', 't-58', 't-57', 't-56', 't-55', 't-54', 't-53', 't-52',
       't-51', 't-50', 't-49', 't-48', 't-47', 't-46', 't-45', 't-44', 't-43',
       't-42', 't-41', 't-40', 't-39', 't-38', 't-37', 't-36', 't-35', 't-34',
       't-33', 't-32', 't-31', 't-30', 't-29', 't-28', 't-27', 't-26', 't-25',
       't-24', 't-23', 't-22', 't-21', 't-20', 't-19', 't-18', 't-17', 't-16',
       't-15', 't-14', 't-13', 't-12', 't-11', 't-10', 't-9', 't-8', 't-7',
       't-6', 't-5', 't-4', 't-3', 't-2', 't-1', 'class'],
      dtype='object')
Index(['t-60', 't-59', 't-58', 't-57', 't-56', 't-55', 't-54', 't-53', 't-52',
       't-51', 't-50', 't-49', 't-48', 't-47', 't-46', 't-45', 't-44', 't-43',
       't-42', 't-41', 't-40', 't-39', 't-38', 't-37', 't-36', 't-35', 't-34',
       't-33', 't-32', 't-31', 't-30', 't-29', 't-28', 't-27', 't-26', 't-25',
       't-24', 't-23', 't-22', 't-21', 't-20', 't-19', 't-18', 't-17', 't-16',
       't-15', 't-14', 't-13', 't-12', 't-11', 't-10'

In [56]:
print(f"Number of rows in the training set: {len(train_df)}")
print(f"Number of rows in the test set: {len(test_df)}")

Number of rows in the training set: 966
Number of rows in the test set: 966


In [57]:
# Check for duplicate rows in the DataFrames
train_duplicates = train_df[train_df.duplicated()]
test_duplicates = test_df[test_df.duplicated()]

print(f"Number of duplicates in the training set: {len(train_duplicates)}")
print(f"Number of duplicates in the test set: {len(test_duplicates)}")


Number of duplicates in the training set: 0
Number of duplicates in the test set: 0


### MelbournePedestrian

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Paths for the ARFF files (update with your local paths)
train_arff_path = '/content/MelbournePedestrian_nmv_TRAIN.arff'
test_arff_path = '/content/MelbournePedestrian_nmv_TEST.arff'

# Step 1: Load the ARFF files for training and test sets
train_data, train_meta = arff.loadarff(train_arff_path)
test_data, test_meta = arff.loadarff(test_arff_path)

# Convert to Pandas DataFrames
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

# Step 2: Separate features and labels for both training and test sets
train_features = train_df.drop(columns=['target'])  # Assuming 'class' is the label column
test_features = test_df.drop(columns=['target'])

# Use the labels from the 'class' column
train_labels = train_df['target'].apply(lambda x: int(x)).values  # Assuming labels are numeric
test_labels = test_df['target'].apply(lambda x: int(x)).values

# Step 3: Normalize the features using StandardScaler
scaler = StandardScaler()

train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

# Step 4: Reshape the features into 3D arrays (samples, time_steps, dimensions)
# Since this dataset has 24 time steps and 1 dimension
X_train = train_features_normalized.reshape(-1, 24, 1)  # 24 time steps, 1 dimension
X_test = test_features_normalized.reshape(-1, 24, 1)

# Step 5: Split the test data into validation and test sets (optional)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, test_labels, test_size=0.50, random_state=42)

# Step 6: Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(train_labels, dtype=torch.int64)

X_valid = torch.tensor(X_valid, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.int64)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.int64)

# DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Step 8: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")

# Output dataset shapes
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 10
X_train shape: torch.Size([1138, 24, 1]), y_train shape: torch.Size([1138])
X_valid shape: torch.Size([1159, 24, 1]), y_valid shape: torch.Size([1159])
X_test shape: torch.Size([1160, 24, 1]), y_test shape: torch.Size([1160])


In [3]:
# Print the columns of the DataFrame to find the correct label column
print(train_df.columns)
print(test_df.columns)

Index(['att1', 'att2', 'att3', 'att4', 'att5', 'att6', 'att7', 'att8', 'att9',
       'att10', 'att11', 'att12', 'att13', 'att14', 'att15', 'att16', 'att17',
       'att18', 'att19', 'att20', 'att21', 'att22', 'att23', 'att24',
       'target'],
      dtype='object')
Index(['att1', 'att2', 'att3', 'att4', 'att5', 'att6', 'att7', 'att8', 'att9',
       'att10', 'att11', 'att12', 'att13', 'att14', 'att15', 'att16', 'att17',
       'att18', 'att19', 'att20', 'att21', 'att22', 'att23', 'att24',
       'target'],
      dtype='object')


### ElectricDevices

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Paths for the ARFF files (update with your local paths)
train_arff_path = '/content/ElectricDevices_TRAIN.arff'
test_arff_path = '/content/ElectricDevices_TEST.arff'

# Step 1: Load the ARFF files for training and test sets
train_data, train_meta = arff.loadarff(train_arff_path)
test_data, test_meta = arff.loadarff(test_arff_path)

# Convert to Pandas DataFrames
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

# Step 2: Separate features and labels for both training and test sets
train_features = train_df.drop(columns=['target'])  # Assuming 'class' is the label column
test_features = test_df.drop(columns=['target'])

# Use the labels from the 'class' column
train_labels = train_df['target'].apply(lambda x: int(x)).values  # Assuming labels are numeric
test_labels = test_df['target'].apply(lambda x: int(x)).values

# Step 3: Normalize the features using StandardScaler
scaler = StandardScaler()

train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features)

# Step 4: Reshape the features into 3D arrays (samples, time_steps, dimensions)
# Since this dataset has 24 time steps and 1 dimension
X_train = train_features_normalized.reshape(-1, 96, 1)  # 24 time steps, 1 dimension
X_test = test_features_normalized.reshape(-1, 96, 1)

# Step 5: Split the test data into validation and test sets (optional)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, test_labels, test_size=0.50, random_state=42)

# Step 6: Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(train_labels, dtype=torch.int64)

X_valid = torch.tensor(X_valid, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.int64)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.int64)

# DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

valid_dataset = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Step 8: Calculate and print the number of unique classes
n_classes = len(np.unique(train_labels))

# Output the number of classes and the shapes of the datasets
print(f"Number of classes: {n_classes}")

# Output dataset shapes
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Number of classes: 7
X_train shape: torch.Size([8926, 96, 1]), y_train shape: torch.Size([8926])
X_valid shape: torch.Size([3855, 96, 1]), y_valid shape: torch.Size([3855])
X_test shape: torch.Size([3856, 96, 1]), y_test shape: torch.Size([3856])


In [9]:
# Inspect the first few lines of the .ts file
with open(train_file_path, 'r') as file:
    for i, line in enumerate(file):
        print(line.strip())
        if i >= 10:  # Limit to the first 10 lines
            break

@problemName train.ts
@timestamps false
@univariate true
@equalLength true
@seriesLength 178
@classLabel true 0 1 2 3 4
@data
6.0420027,1.8691087,7.432967,-8.997802,-10.041026,-20.212454,-8.128449,-9.95409,-7.085226,-13.431501,4.911844,-6.1289377,-1.4344323,2.303785,-1.0866911,-14.909402,-10.214896,-7.432967,-12.388278,-11.779732,-2.912332,-4.5641026,0.3912088,-3.6078143,4.3032966,5.34652,-4.477167,-7.6068377,1.1736264,7.7807083,9.171673,2.1299145,-2.912332,3.520879,9.519414,-3.520879,-7.172161,-15.691819,-2.7384615,-8.736997,8.389256,5.955067,18.47375,-5.5203905,-1.347497,-8.476191,-3.8686202,-18.734554,-10.388767,-0.3912088,7.693773,9.258608,13.692307,3.8686202,5.694261,-0.043467645,4.042491,0.9128205,-2.303785,8.476191,7.3460317,-6.5636144,8.389256,1.6952381,13.8661785,1.2605617,10.041026,-13.344566,-1.956044,0.3912088,4.911844,0.82588524,11.431991,2.9992673,18.47375,5.0857143,9.345543,10.649572,7.693773,0.30427352,-0.13040294,5.34652,7.8676434,11.692796,25.776312,3.9555554,9.345543

### Sleep

In [11]:
import numpy as np

# Function to load the .ts file
def load_ts_file(file_path):
    data = []
    labels = []
    with open(file_path, 'r') as file:
        is_metadata = True
        for line in file:
            line = line.strip()
            if is_metadata:
                if line.lower() == "@data":
                    is_metadata = False
                continue
            if len(line) > 0:
                # Try to split the line into series and label
                try:
                    series, label = line.split(':')
                    series = np.array(series.split(','), dtype=np.float32)
                    data.append(series)
                    labels.append(int(label))  # Convert the label to an integer
                except ValueError:
                    print(f"Skipping invalid line: {line}")
                    continue  # Skip lines that don't conform to the expected format
    return np.array(data), np.array(labels)

# Test by loading the file
train_file_path = '/content/Sleep_TRAIN.ts'
X_train, y_train = load_ts_file(train_file_path)

# Print the loaded data
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")


Skipping invalid line: 29.18022,30.915506,30.550182,27.262272,17.581196,7.808791,8.996093,13.562637,22.787058,25.435654,27.992918,14.019292,-5.2515264,0.95897436,-3.2422466,7.900122,18.95116,18.768497,16.941881,6.7128205,-3.1509159,-11.644689,-10.000732,-8.813431,0.6849817,1.1416361,3.6989012,10.274725,15.0239315,9.361417,7.3521366,6.6214895,10.183394,11.462027,9.72674,7.808791,10.457387,5.0688643,1.4156288,14.658608,22.695726,28.449574,32.468132,31.006838,25.344322,16.667887,18.403175,18.403175,15.2065935,10.7313795,6.5301585,9.452747,4.3382173,-0.7763126,-10.000732,-18.494505,-26.348963,-32.10281,-38.130646,-40.59658,-44.249817,-45.163124,-42.4232,-50.186325,-50.186325,-52.378265,-56.214165,-56.76215,-59.593407,-59.228085,-51.46496,-55.026863,-54.93553,-50.64298,-54.296215,-49.181686,-48.359707,-49.54701,-45.61978,-44.889133,-45.43712,-44.43248,-45.163124,-47.172405,-44.797802,-39.59194,-33.29011,-32.742123,-26.896948,-22.787058,-24.339682,-24.339682,-21.873749,-10.640049,1.3242979,7

In [13]:
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

# Function to load the .ts file
def load_ts_file(file_path):
    data = []
    labels = []
    with open(file_path, 'r') as file:
        is_metadata = True
        for line in file:
            line = line.strip()
            if is_metadata:
                if line.lower() == "@data":
                    is_metadata = False
                continue
            if len(line) > 0:
                try:
                    series, label = line.split(':')
                    series = np.array(series.split(','), dtype=np.float32)
                    data.append(series)
                    labels.append(int(label))  # Convert the label to an integer
                except ValueError:
                    print(f"Skipping invalid line: {line}")
                    continue
    return np.array(data), np.array(labels)

# Paths to the .ts files (update with your paths)
train_data_path = '/content/Sleep_TEST.ts'
test_data_path = '/content/Sleep_TEST.ts'

# Load the train and test datasets
X_train, y_train = load_ts_file(train_file_path)
X_test, y_test = load_ts_file(test_file_path)

# Normalize the features (optional)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape the features to have a third dimension (samples, time_steps, 1)
X_train_scaled = X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1)
X_test_scaled = X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1)

# Split the test data into validation and test sets
X_valid_scaled, X_test_scaled, y_valid, y_test = train_test_split(X_test_scaled, y_test, test_size=0.50, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.int64)

X_valid_tensor = torch.tensor(X_valid_scaled, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.int64)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.int64)

# Create DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Calculate the number of unique classes
n_classes = len(np.unique(y_train))

# Print the shapes and number of classes
print(f"Number of classes: {n_classes}")
print(f"X_train shape: {X_train_tensor.shape}, y_train shape: {y_train_tensor.shape}")
print(f"X_valid shape: {X_valid_tensor.shape}, y_valid shape: {y_valid_tensor.shape}")
print(f"X_test shape: {X_test_tensor.shape}, y_test shape: {y_test_tensor.shape}")


Skipping invalid line: -20.742857,-25.674725,-20.356045,-10.589011,7.591209,19.485714,34.861538,40.083515,44.62857,41.630768,35.73187,31.863737,33.604397,36.6022,38.43956,38.246155,32.734066,18.808792,9.041759,3.7230768,14.553846,17.261538,9.912087,5.6571426,5.753846,6.720879,-1.3054945,-14.747252,-21.903297,-29.736263,-29.542856,-29.156044,-32.34725,-38.246155,-48.98022,-50.527473,-51.204395,-52.84835,-55.362637,-53.815384,-50.914288,-51.784615,-52.84835,-55.556046,-57.393406,-57.78022,-60.197803,-68.03077,-66.38681,-72.09231,-71.512085,-64.64616,-56.426373,-54.105495,-47.43297,-48.206593,-49.56044,-41.630768,-41.14725,-28.76923,-28.865934,-33.12088,-33.797802,-34.764835,-36.698902,-40.663734,-38.923077,-34.861538,-27.705494,-23.934067,-21.226374,-20.25934,-15.810989,-18.51868,-7.978022,-4.4,10.008791,25.094505,30.413187,33.314285,38.246155,30.703297,31.573626,29.349451,27.512089,22.483517,24.901098,20.452747,21.323076,21.903297,21.323076,28.865934,31.573626,34.184616,39.503296,55.169