<a href="https://colab.research.google.com/github/tingweiwu17/colab_machine_learing/blob/main/ECG_Heartbeat_Categorization_using_CNN_and_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### 資料集來源: https://www.kaggle.com/shayanfazeli/heartbeat
#### 參考資料與程式碼:
*   https://blog.csdn.net/cskywit/article/details/87460704
*   https://blog.csdn.net/weixin_39685674/article/details/110278175
*   https://www.kaggle.com/arpandas65/covid-19-projection-using-lstm
*   https://www.kaggle.com/amirrezaeian/time-series-data-analysis-using-lstm-tutorial
*   https://brohrer.mcknote.com/zh-Hant/how_machine_learning_works/how_rnns_lstm_work.html

In [None]:
!pip uninstall gdown -y && pip install gdown
import gdown

In [None]:
# Download from Google Drive
!gdown 1Zc2otIItjH_QMIWmVmt3Ez-aFHdn9uYx
!unzip -o -q ecg-kaggle.zip

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, utils, callbacks, metrics

import cv2
tf.__version__

In [None]:
df = pd.read_csv('kaggle/train.csv')
num_classes=5

In [None]:
df.head()

In [None]:
df_train, df_val = train_test_split(df, test_size=0.2)

In [None]:
# last column: label
# other columns: ECG signals
x_train = df_train.iloc[:, :-1]
x_train = np.expand_dims(x_train, axis=-1)
y_train = df_train.iloc[:, -1]

x_val = df_val.iloc[:, :-1]
x_val = np.expand_dims(x_val, axis=-1)
y_val = df_val.iloc[:, -1]

y_train = utils.to_categorical(y_train, num_classes=num_classes)
y_val = utils.to_categorical(y_val, num_classes=num_classes)

In [None]:
# (# of data, signal length, channel)
x_train.shape, y_train.shape, x_val.shape, y_val.shape

In [None]:
idx = np.random.randint(0, len(x_train))
print('label: ', y_train[idx])
plt.plot(x_train[idx])

In [None]:
np.unique(np.argmax(y_train, axis=-1), return_counts=True), np.unique(np.argmax(y_val, axis=-1), return_counts=True)

#### CNN

In [None]:
def conv_bn(x, filters, strides=1):
    x = layers.Conv1D(filters, 3, strides=strides, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    return x

def build_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    x = conv_bn(inputs, 64)
    x = conv_bn(x, 64)
    x = conv_bn(x, 64, strides=2)
    x = conv_bn(x, 128)
    x = conv_bn(x, 128, strides=2)
    x = conv_bn(x, 256)
    x = conv_bn(x, 256)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs, x)
    model.compile(optimizer='adam', 
           loss='categorical_crossentropy',
           metrics = ['accuracy'])
    return model

In [None]:
model = build_model(input_shape=x_train.shape[1:])
model.summary()

In [None]:
callback = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30),
       tf.keras.callbacks.ModelCheckpoint(filepath='model.h5', monitor='val_loss', save_best_only=True)]

In [None]:
history = model.fit(x_train, y_train, 
      epochs=100,
      callbacks=callback, 
      batch_size=32,
      validation_data=(x_val, y_val),
      )

In [None]:
def cls_report(path, x_val, y_val):
    model = models.load_model(path)
    pred = np.argmax(model.predict(x_val), axis=-1)
    print(classification_report(np.argmax(y_val, axis=-1), pred))
    print(confusion_matrix(np.argmax(y_val, axis=-1), pred))

In [None]:
cls_report('model.h5', x_val, y_val)

#### LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import plot_model
from tensorflow.keras import Input
from tensorflow.keras.layers import BatchNormalization

In [None]:
epochs = 100
batch_size = 32

In [None]:
# (# of data, signal length, channel)
x_train.shape, y_train.shape, x_val.shape, y_val.shape

In [None]:
# Reshapping the data for LSTM
x_train_lstm = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_val_lstm= x_val.reshape(( x_val.shape[0], 1,  x_val.shape[1]))
timesteps = x_train_lstm.shape[1]
n_features = x_train_lstm.shape[2]
print(x_train_lstm.shape, y_train.shape, x_val_lstm.shape, y_val.shape)

In [None]:
print(timesteps, n_features, num_classes)

In [None]:
# Stacked LSTM Model
model_lstm = Sequential()
model_lstm.add(LSTM(50, activation='relu', input_shape=(timesteps, n_features),return_sequences=True))
model_lstm.add(LSTM(150, activation='relu'))
model_lstm.add(Dense(num_classes, activation='relu'))
model_lstm.summary()

In [None]:
# Compiling the model
model_lstm.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredLogarithmicError())
callbacks = [ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.6),
        EarlyStopping(monitor='val_loss', patience=10),
        ModelCheckpoint(filepath='model_lstm.h5', monitor='val_loss', save_best_only=True)]
# fit the model
hist=model_lstm.fit(x_train_lstm, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val_lstm, y_val), verbose=2, 
         shuffle=True,callbacks=callbacks)

In [None]:
# summarize history for loss
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
cls_report('model_lstm.h5', x_val_lstm, y_val)