In [3]:
import h5py
import numpy as np

# Load data function
def load_data(file_path):
    with h5py.File(file_path, "r") as file:
        data = {
            'X': file['data'][:],
            'Y': file['labels'][:],
            'S': file['sessions'][:],
            'H': file['hardwares'][:]
        }
    return data

# Load and combine datasets
data_test = load_data("../Data/test_raw.h5")
data_neg = load_data("../Data/neg_raw.h5")

# Combine test and negative datasets
for key in data_test:
    data_test[key] = np.concatenate((data_test[key], data_neg[key]), axis=0)

# Shuffle the combined data
indices = np.random.permutation(len(data_test['X']))
for key in data_test:
    data_test[key] = data_test[key][indices]

# Split data based on hardware
def split_and_save_data(data):
    unique_hardware = np.unique(data['H'])
    for hardware in unique_hardware:
        # Filter data for each hardware
        mask = data['H'] == hardware
        filtered_data = {
            'X': data['X'][mask],
            'Y': data['Y'][mask],
            'S': data['S'][mask],
            'H': data['H'][mask]
        }
        
        # Save filtered data to a new HDF5 file
        file_name = f"../Data/test_hardware_{hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware}.h5"
        with h5py.File(file_name, 'w') as f:
            for key, value in filtered_data.items():
                f.create_dataset(key, data=value)
        print(f"Data for hardware {hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware} saved to {file_name}")

split_and_save_data(data_test)


Data for hardware BioSemi saved to ../Data/test_hardware_BioSemi.h5
Data for hardware Geodisi saved to ../Data/test_hardware_Geodisi.h5
Data for hardware HydroCe saved to ../Data/test_hardware_HydroCe.h5


In [4]:
import h5py
import numpy as np

# Load data function
def load_data(file_path):
    with h5py.File(file_path, "r") as file:
        data = {
            'X': file['data'][:],
            'Y': file['labels'][:],
            'S': file['sessions'][:],
            'H': file['hardwares'][:]
        }
    return data

# Load and combine datasets
data_train = load_data("../Data/train_raw.h5")


# Shuffle the combined data
indices = np.random.permutation(len(data_train['X']))
for key in data_train:
    data_train[key] = data_train[key][indices]

# Split data based on hardware
def split_and_save_data(data):
    unique_hardware = np.unique(data['H'])
    for hardware in unique_hardware:
        # Filter data for each hardware
        mask = data['H'] == hardware
        filtered_data = {
            'data': data['X'][mask],
            'labels': data['Y'][mask],
            'sessions': data['S'][mask],
            'H': data['H'][mask]
        }
        
        # Save filtered data to a new HDF5 file
        file_name = f"../Data/train_hardware_{hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware}.h5"
        with h5py.File(file_name, 'w') as f:
            for key, value in filtered_data.items():
                f.create_dataset(key, data=value)
        print(f"Data for hardware {hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware} saved to {file_name}")

split_and_save_data(data_train)


Data for hardware BioSemi saved to ../Data/train_hardware_BioSemi.h5
Data for hardware Geodisi saved to ../Data/train_hardware_Geodisi.h5
Data for hardware HydroCe saved to ../Data/train_hardware_HydroCe.h5


In [5]:
import h5py
import numpy as np
from collections import Counter

# Load the dataset
with h5py.File("../Data/train_raw.h5", "r") as f:
    X_test = f['data'][:]
    Y_test = f['labels'][:]
    S_test = f['sessions'][:]
    H_test = f['hardwares'][:]

# Initialize a new list to store indices to keep
indices_to_keep = []

# Find unique labels
unique_labels = np.unique(Y_test)

# Process each label
for label in unique_labels:
    # Get indices of the current label
    label_indices = np.where(Y_test == label)[0]
    # Extract hardware types for the current label
    hardware_for_label = H_test[label_indices]
    # Count occurrences of each hardware
    hardware_counts = Counter(hardware_for_label)
    # Find the hardware with the majority occurrences
    majority_hardware = max(hardware_counts, key=hardware_counts.get)
    # Keep indices for the majority hardware
    majority_indices = label_indices[hardware_for_label == majority_hardware]
    indices_to_keep.extend(majority_indices)

# Filter the dataset
indices_to_keep = np.array(indices_to_keep)
X_filtered = X_test[indices_to_keep]
Y_filtered = Y_test[indices_to_keep]
S_filtered = S_test[indices_to_keep]
H_filtered = H_test[indices_to_keep]

# Save the filtered dataset
with h5py.File("../Data/train_raw_unconnected.h5", "w") as f_out:
    f_out.create_dataset('data', data=X_filtered)
    f_out.create_dataset('labels', data=Y_filtered)
    f_out.create_dataset('sessions', data=S_filtered)
    f_out.create_dataset('hardwares', data=H_filtered)

print("Filtered dataset saved as train_raw_unconnected.h5.")


Filtered dataset saved as train_raw_unconnected.h5.


In [1]:
import h5py
import numpy as np

# Load data function
def load_data(file_path):
    with h5py.File(file_path, "r") as file:
        data = {
            'X': file['data'][:],
            'Y': file['labels'][:],
            'S': file['sessions'][:],
            'H': file['hardwares'][:]
        }
    return data

# Load and combine datasets
data_test = load_data("../Data/train_raw_unconnected.h5")



# Shuffle the combined data
indices = np.random.permutation(len(data_test['X']))
for key in data_test:
    data_test[key] = data_test[key][indices]

# Split data based on hardware
def split_and_save_data(data):
    unique_hardware = np.unique(data['H'])
    for hardware in unique_hardware:
        # Filter data for each hardware
        mask = data['H'] == hardware
        filtered_data = {
            'data': data['X'][mask],
            'labels': data['Y'][mask],
            'sessions': data['S'][mask],
            'hardwares': data['H'][mask]
        }
        
        # Save filtered data to a new HDF5 file
        file_name = f"../Data/train_unconnected_hardware_{hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware}.h5"
        with h5py.File(file_name, 'w') as f:
            for key, value in filtered_data.items():
                f.create_dataset(key, data=value)
        print(f"Data for hardware {hardware.decode('utf-8') if isinstance(hardware, bytes) else hardware} saved to {file_name}")

split_and_save_data(data_test)


Data for hardware BioSemi saved to ../Data/train_unconnected_hardware_BioSemi.h5
Data for hardware Geodisi saved to ../Data/train_unconnected_hardware_Geodisi.h5
Data for hardware HydroCe saved to ../Data/train_unconnected_hardware_HydroCe.h5
