# Setting for training on TPUs

In [1]:
import os

os.environ['TPU_NAME'] = 'local'

os.environ['NEXT_PLUGGABLE_DEVICE_USE_C_API'] = 'true'
os.environ['TF_PLUGGABLE_DEVICE_LIBRARY_PATH'] = 'local'
os.environ['TF_PLUGGABLE_DEVICE_LIBRARY_PATH'] = '/lib/libtpu.so'

In [3]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)


Tensorflow version 2.8.0


In [4]:

cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver()


In [5]:
tf.config.experimental_connect_to_cluster(cluster_resolver)
tf.tpu.experimental.initialize_tpu_system(cluster_resolver)


INFO:tensorflow:Deallocate tpu buffers before initializing tpu system.
INFO:tensorflow:Initializing the TPU system: local
INFO:tensorflow:Finished initializing TPU system.


<tensorflow.python.tpu.topology.Topology at 0x7f0b5cef3160>

In [6]:
strategy = tf.distribute.TPUStrategy(cluster_resolver)

INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:5, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttrib

In [7]:
print("All devices: ", tf.config.list_logical_devices('TPU'))

All devices:  [LogicalDevice(name='/device:TPU:0', device_type='TPU'), LogicalDevice(name='/device:TPU:1', device_type='TPU'), LogicalDevice(name='/device:TPU:2', device_type='TPU'), LogicalDevice(name='/device:TPU:3', device_type='TPU'), LogicalDevice(name='/device:TPU:4', device_type='TPU'), LogicalDevice(name='/device:TPU:5', device_type='TPU'), LogicalDevice(name='/device:TPU:6', device_type='TPU'), LogicalDevice(name='/device:TPU:7', device_type='TPU')]


In [44]:
with strategy.scope():
#     model = tf.keras.Sequential([
#         tf.keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(10000, 2)),
#         tf.keras.layers.MaxPooling1D(pool_size=2),
#         tf.keras.layers.Conv1D(filters=128, kernel_size=3, activation='relu'),
#         tf.keras.layers.MaxPooling1D(pool_size=2),
#         tf.keras.layers.Flatten(),
#         tf.keras.layers.Dense(128, activation='relu'),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.Dense(6, activation='softmax')
#     ])
    model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(10000, 2)),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(6, activation='softmax')  # 6 classes for exercise intensity levels
    ])
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.9)
    model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),  # Set your desired learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']

    )

In [12]:
import tensorflow as tf

# Check the device assignment for operations
for layer in model.layers:
    for weight in layer.weights:
        print(f"{weight.name} is assigned to {weight.device}")

conv1d/kernel:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
conv1d/bias:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
conv1d_1/kernel:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
conv1d_1/bias:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
dense/kernel:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
dense/bias:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
dense_1/kernel:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0
dense_1/bias:0 is assigned to /job:localhost/replica:0/task:0/device:TPU:0


# Process Data

In [19]:
import scipy.io
import numpy as np
import os
import pandas as pd
# Paths to files
mat_files_directory = "./physionet.org/files/ephnogram/1.0.0/MAT/"
pandas_csv_file_path = "./physionet.org/files/ephnogram/1.0.0/ECGPCGSpreadsheet.csv"

In [20]:
# Mapping from 'recording scenario' to label (1 to 6)
scenario_to_label = {
    'rest: laying on bed': 1,
    'rest: sitting on armchair': 2,
    'exercise: walking at constant speed': 3,
    'exercise: pedaling a stationary bicycle': 4,
    'exercise: bicycle stress test': 5,
    'exercise: bruce protocol treadmill stress test': 6,
}

In [21]:
# Read the CSV file into a pandas DataFrame
### Maping file to labels
df = pd.read_csv(pandas_csv_file_path)

# Create a dictionary mapping filenames to labels
file_to_label = {}
for index, row in df.iterrows():
    filename = row['Record Name']  # Adjust column name if necessary
    scenario = row['Recording Scenario']  # Adjust column name if necessary
    # Check if the scenario can be mapped to a label
    if type(scenario) == str:
        if scenario.lower() in scenario_to_label:
            label = scenario_to_label[scenario.lower()]
            file_to_label[filename] = label
    else:
        # Skip scenarios that don't map to a clear label
        continue

In [22]:
def preprocess_data(ecg_signal, pcg_signal):
    # Normalize ECG and PCG signals
    ecg_signal = (ecg_signal - np.min(ecg_signal)) / (np.max(ecg_signal) - np.min(ecg_signal))
    pcg_signal = (pcg_signal - np.min(pcg_signal)) / (np.max(pcg_signal) - np.min(pcg_signal))

    # Combine ECG and PCG signals into one dataset
    combined_signal = np.stack((ecg_signal, pcg_signal), axis=-1)

    return combined_signal

In [23]:
import os
from multiprocessing import Pool, cpu_count
def process_mat_file(file_path, label):
    # Load the .mat file
    mat_data = scipy.io.loadmat(file_path)
    
    # Extract first channel of ECG and the only channel of PCG
    ecg_data = mat_data['ECG'][0]  # Taking only the first channel of ECG
    pcg_data = mat_data['PCG'][0]  # Assuming only one channel for PCG
    
    # Preprocess the signals
    combined_signal = preprocess_data(ecg_data, pcg_data)
    
    # Number of points in the signal
    num_points = combined_signal.shape[0]
    
    # Split data into segments of 10,000 points
    samples = []
    for i in range(0, num_points, 10000):
        if i + 10000 <= num_points:
            sample = combined_signal[i:i+10000]
            samples.append((sample, label))
    
    return samples

In [12]:
# def process_single_file(args):
#     file_path, label = args
#     samples = process_mat_file(file_path, label)
#     return samples

# def process_files_in_parallel(mat_files_directory, file_to_label):
#     all_samples = []
#     mat_files = [f for f in os.listdir(mat_files_directory) if f.endswith('.mat')]
    
#     # Prepare the arguments for each file
#     args_list = []
#     for mat_file in mat_files:
#         file_path = os.path.join(mat_files_directory, mat_file)
#         name = mat_file.split('.')[0]
#         if name in file_to_label:
#             label = file_to_label[name]
#             args_list.append((file_path, label))

#     # Use multiprocessing to process files in parallel
#     with Pool(processes=70) as pool:
#         results = pool.map(process_single_file, args_list)
    
#     # Combine the results
#     for result in results:
#         all_samples.extend(result)
    
#     return all_samples

In [13]:
# Usage
# all_samples = process_files_in_parallel(mat_files_directory, file_to_label)

In [24]:
# Process each .mat file and assign labels
all_samples = []
mat_files = [f for f in os.listdir(mat_files_directory) if f.endswith('.mat')]
print(mat_files)
for mat_file in mat_files:
    file_path = os.path.join(mat_files_directory, mat_file)
    name = mat_file.split('.')[0]
    if name in file_to_label:
        label = file_to_label[name]

        samples = process_mat_file(file_path, label)
        all_samples.extend(samples)  # Store all samples in one list

['ECGPCG0006.mat', 'ECGPCG0013.mat', 'ECGPCG0023.mat', 'ECGPCG0016.mat', 'ECGPCG0053.mat', 'ECGPCG0039.mat', 'ECGPCG0015.mat', 'ECGPCG0010.mat', 'ECGPCG0028.mat', 'ECGPCG0051.mat', 'ECGPCG0008.mat', 'ECGPCG0046.mat', 'ECGPCG0067.mat', 'ECGPCG0011.mat', 'ECGPCG0063.mat', 'ECGPCG0040.mat', 'ECGPCG0068.mat', 'ECGPCG0002.mat', 'ECGPCG0050.mat', 'ECGPCG0009.mat', 'ECGPCG0029.mat', 'ECGPCG0032.mat', 'ECGPCG0037.mat', 'ECGPCG0066.mat', 'ECGPCG0027.mat', 'ECGPCG0014.mat', 'ECGPCG0054.mat', 'ECGPCG0026.mat', 'ECGPCG0060.mat', 'ECGPCG0062.mat', 'ECGPCG0024.mat', 'ECGPCG0055.mat', 'ECGPCG0004.mat', 'ECGPCG0030.mat', 'ECGPCG0031.mat', 'ECGPCG0045.mat', 'ECGPCG0012.mat', 'ECGPCG0047.mat', 'ECGPCG0069.mat', 'ECGPCG0044.mat', 'ECGPCG0057.mat', 'ECGPCG0005.mat', 'ECGPCG0025.mat', 'ECGPCG0003.mat', 'ECGPCG0042.mat', 'ECGPCG0052.mat', 'ECGPCG0049.mat', 'ECGPCG0018.mat', 'ECGPCG0038.mat', 'ECGPCG0034.mat', 'ECGPCG0048.mat', 'ECGPCG0017.mat', 'ECGPCG0056.mat', 'ECGPCG0019.mat', 'ECGPCG0001.mat', 'ECGPCG00

In [25]:
# Example: Print the number of samples and a sample label
example_sample = all_samples[0] if all_samples else None
if example_sample:
    print(f"Number of samples: {len(all_samples)}, Example sample label: {example_sample[1]}")
else:
    print("No samples were processed.")

Number of samples: 64992, Example sample label: 2


In [31]:
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.utils import to_categorical

def create_datasets(all_samples, test_size=0.2):
    # Separate signals and labels from the all_samples list
    signals = [sample[0] for sample in all_samples]
    labels = [sample[1] for sample in all_samples]
    
    # Convert lists to numpy arrays for better performance
    signals = np.array(signals)
    labels = np.array(labels)
    labels = to_categorical(labels - 1, num_classes=6)
    # Use train_test_split to split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(signals, labels, test_size=test_size, random_state=42)
    
    return X_train, X_test, y_train, y_test

# Example usage
X_train, X_test, y_train, y_test = create_datasets(all_samples, test_size=0.1)

# Output the shapes of the created datasets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (58492, 10000, 2)
X_test shape: (6500, 10000, 2)
y_train shape: (58492, 6)
y_test shape: (6500, 6)


In [38]:
X_train_sample ,y_train_sample = X_train[:1024], y_train[:1024]

In [62]:
# #### part of convert to training on TPUs
from tensorflow.keras.callbacks import TensorBoard
import datetime

In [56]:
# prepair dataset for Training on TPU
batch_size = 512
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [57]:
train_dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 10000, 2), dtype=tf.float64, name=None), TensorSpec(shape=(None, 6), dtype=tf.float32, name=None))>

In [58]:
for element in test_dataset:
    print(f"Dataset element is on: {element[0].device}")
    break  # Check the placement of just one batch

Dataset element is on: /job:localhost/replica:0/task:0/device:CPU:0


In [None]:
steps_per_epoch = 58492//batch_size
validation_steps = 6500//batch_size

model.fit(train_dataset,
          epochs=50,
#           steps_per_epoch=steps_per_epoch,
          validation_data=test_dataset, 
#           validation_steps=validation_steps
         )

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
model.fit(train_dataset,
          epochs=60,
#           steps_per_epoch=steps_per_epoch,
          validation_data=test_dataset,
          callbacks=[tensorboard_callback]
#           validation_steps=validation_steps
         )

Epoch 1/60

In [None]:
model.evaluate(test_dataset)
model.save('ecg_pcg_tpu_model.h5')

# Install some dependencies

In [None]:
!pip install scipy

In [None]:
!pip install pandas

In [None]:
!pip install scikit-learn

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, LSTM
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report, accuracy_score