In [None]:
import os
import numpy as np 
import pandas as pd 
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense
from keras.models import Model
from numpy.random import seed

# **Task 01 : ECG data**

In [None]:
DF= pd.read_csv("/content/ECG-12Lead.csv")
DF.isnull().sum()
DF = DF.dropna()
mean = DF.mean()
std = DF.std()
New_DF = (DF-mean)/std
X = New_DF.iloc[:, :]
X_train, X_test= train_test_split(X, test_size = 0.2, random_state = 50)
ncol = X.shape[1]
print(X_train.shape, X_test.shape, ncol)
print(New_DF)

(13875, 12) (3469, 12) 12
            -49       -43         6  ...       -70       -85       -83
0     -0.497260 -0.312651  0.128602  ... -0.195806 -0.304358 -0.415875
1     -0.512798 -0.253659  0.268800  ... -0.181961 -0.279205 -0.373585
2     -0.559411 -0.283155  0.268800  ... -0.203717 -0.299327 -0.380633
3     -0.458415 -0.247760  0.210384  ... -0.209650 -0.299327 -0.373585
4     -0.427339 -0.218264  0.210384  ... -0.225472 -0.311904 -0.391206
...         ...       ...       ...  ...       ...       ...       ...
17339 -0.295266  0.000006  0.444048  ... -0.211628 -0.236444 -0.415875
17340 -0.279728  0.005905  0.444048  ... -0.215583 -0.243990 -0.412351
17341 -0.303035  0.017703  0.490781  ... -0.221517 -0.256567 -0.415875
17342 -0.318573  0.000006  0.467415  ... -0.203717 -0.228898 -0.394730
17343 -0.334111  0.035401  0.572564  ... -0.211628 -0.233929 -0.422923

[17344 rows x 12 columns]


# Part 1 Autoencoder

In [None]:
encoding_dim = 3
input_dim = Input(shape = (ncol, ))
# Encoder Layers
encoded1 = Dense(8, activation = 'relu')(input_dim)
encoded2 = Dense(6, activation = 'relu')(encoded1)
encoded3 = Dense(encoding_dim, activation = 'relu')(encoded2)
# Decoder Layers
decoded1 = Dense(6, activation = 'relu')(encoded3)
decoded2 = Dense(8, activation = 'relu')(decoded1)
decoded3 = Dense(ncol, activation = 'linear')(decoded2)
# Combine Encoder and Deocder layers
autoencoder = Model(inputs = input_dim, outputs = decoded3)

# Compile the Model
autoencoder.compile(optimizer = 'adadelta', loss = 'mse')
autoencoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 12)]              0         
_________________________________________________________________
dense (Dense)                (None, 8)                 104       
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 54        
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 21        
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 24        
_________________________________________________________________
dense_4 (Dense)              (None, 8)                 56        
_________________________________________________________________
dense_5 (Dense)              (None, 12)                108   

In [None]:
# Training a Auto Encoder
autoencoder.fit(X_train, X_train, epochs = 10, batch_size = 32, shuffle = False, validation_data = (X_test, X_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f240014f4d0>

In [None]:
# Getting the encoded values
encoder = Model(inputs = input_dim, outputs = encoded3)
encoded_input = Input(shape = (encoding_dim, ))
# Decoding the values back
deco = autoencoder.layers[-3](encoded_input)
deco = autoencoder.layers[-2](deco)
deco = autoencoder.layers[-1](deco)
# create the decoder model
decoder = Model(encoded_input, deco)

In [None]:
encoded_train = pd.DataFrame(encoder.predict(X_train))
encoded_test = pd.DataFrame(encoder.predict(X_test))
decoded_train = pd.DataFrame(decoder.predict(encoded_train))
decoded_test = pd.DataFrame(decoder.predict(encoded_test))
# Checking the dimensions
print(encoded_train.shape)
print(decoded_train.shape)
# Merging the decoded test and train data to compare with X
decoded_train = decoded_train.iloc[:, :]
decoded_test = decoded_test.iloc[:, :]
decoded = pd.concat([decoded_train, decoded_test])

(13875, 3)
(13875, 12)


# **Part 2 PCA**

In [None]:
import pandas as pd
import numpy as np
import sklearn
import sklearn.datasets
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical, plot_model
from PIL import Image as im
from termcolor import colored
from mpl_toolkits.mplot3d import Axes3D

In [None]:
from sklearn.decomposition import PCA
import cv2
pca = PCA(n_components=3)
pca.fit(X)
trans_pca = pca.transform(X)
arr = pca.inverse_transform(trans_pca)

# **Ques 1 : Error calculation**

In [None]:
# error calculation
pred = decoded.to_numpy()
pred_pca = arr[:,:]
true = X.to_numpy()
print(pred.shape)
print(pred_pca.shape)
print(true.shape)

from keras import backend as K

error = K.sqrt(K.mean(K.square(pred - true)))
error_pca = K.sqrt(K.mean(K.square(pred_pca - true)))
print("RMSE of autoencoder is",error)
print("RMSE of PCA is",error_pca)

(17344, 12)
(17344, 12)
(17344, 12)
RMSE of autoencoder is tf.Tensor(1.002682937395468, shape=(), dtype=float64)
RMSE of PCA is tf.Tensor(0.08638926897050965, shape=(), dtype=float64)


# **TASK 02 : Large Data**

In [None]:
DF1= pd.read_csv("/content/ECG-12Lead.csv")
print(DF1)

       -49  -43   6  47  -27  -18  131  -57  -97  -70  -85  -83
0      -64  -53  11  59  -38  -22  186  -75 -131  -99 -121 -118
1      -66  -43  23  54  -45   -9  169  -65 -111  -92 -111 -106
2      -72  -48  23  60  -47  -13  181  -77 -123 -103 -119 -108
3      -59  -42  18  50  -39  -12  172  -89 -129 -106 -119 -106
4      -55  -37  18  46  -36  -10  173 -106 -144 -114 -124 -111
...    ...  ...  ..  ..  ...  ...  ...  ...  ...  ...  ...  ...
17339  -38    0  38  18  -39   19   61  -63  -78 -107  -94 -118
17340  -36    1  38  17  -37   21   55  -47  -71 -109  -97 -117
17341  -39    3  42  19  -40   23   54  -53  -71 -112 -102 -118
17342  -41    0  40  20  -41   20   58  -51  -68 -103  -91 -112
17343  -43    6  49  19  -46   27   68  -57  -71 -107  -93 -120

[17344 rows x 12 columns]


In [None]:
mean1 = DF1.mean()
std1 = DF1.std()
New_DF1 = (DF1-mean1)/std1
#DF = DF.iloc[:,:80]
DF1 = New_DF1.fillna(0)
print(DF1.isnull().sum())
print(DF1)

-49    0
-43    0
6      0
47     0
-27    0
-18    0
131    0
-57    0
-97    0
-70    0
-85    0
-83    0
dtype: int64
            -49       -43         6  ...       -70       -85       -83
0     -0.497260 -0.312651  0.128602  ... -0.195806 -0.304358 -0.415875
1     -0.512798 -0.253659  0.268800  ... -0.181961 -0.279205 -0.373585
2     -0.559411 -0.283155  0.268800  ... -0.203717 -0.299327 -0.380633
3     -0.458415 -0.247760  0.210384  ... -0.209650 -0.299327 -0.373585
4     -0.427339 -0.218264  0.210384  ... -0.225472 -0.311904 -0.391206
...         ...       ...       ...  ...       ...       ...       ...
17339 -0.295266  0.000006  0.444048  ... -0.211628 -0.236444 -0.415875
17340 -0.279728  0.005905  0.444048  ... -0.215583 -0.243990 -0.412351
17341 -0.303035  0.017703  0.490781  ... -0.221517 -0.256567 -0.415875
17342 -0.318573  0.000006  0.467415  ... -0.203717 -0.228898 -0.394730
17343 -0.334111  0.035401  0.572564  ... -0.211628 -0.233929 -0.422923

[17344 rows x 12 columns]


In [None]:
X1 = DF1.iloc[:, :]
X1_train, X1_test= train_test_split(X1, test_size = 0.2, random_state = 50)
print(X1_train.shape, X1_test.shape)
ncol1 = X1.shape[1]
print(X1)

(13875, 12) (3469, 12)
            -49       -43         6  ...       -70       -85       -83
0     -0.497260 -0.312651  0.128602  ... -0.195806 -0.304358 -0.415875
1     -0.512798 -0.253659  0.268800  ... -0.181961 -0.279205 -0.373585
2     -0.559411 -0.283155  0.268800  ... -0.203717 -0.299327 -0.380633
3     -0.458415 -0.247760  0.210384  ... -0.209650 -0.299327 -0.373585
4     -0.427339 -0.218264  0.210384  ... -0.225472 -0.311904 -0.391206
...         ...       ...       ...  ...       ...       ...       ...
17339 -0.295266  0.000006  0.444048  ... -0.211628 -0.236444 -0.415875
17340 -0.279728  0.005905  0.444048  ... -0.215583 -0.243990 -0.412351
17341 -0.303035  0.017703  0.490781  ... -0.221517 -0.256567 -0.415875
17342 -0.318573  0.000006  0.467415  ... -0.203717 -0.228898 -0.394730
17343 -0.334111  0.035401  0.572564  ... -0.211628 -0.233929 -0.422923

[17344 rows x 12 columns]


# Part 1 Autoencoder

In [None]:
encoding_dim1 = 80
input_dim1 = Input(shape = (ncol1, ))
# Encoder Layers
encoded1 = Dense(200, activation = 'relu')(input_dim1)
encoded2 = Dense(128, activation = 'relu')(encoded1)
encoded3 = Dense(encoding_dim1, activation = 'relu')(encoded2)
# Decoder Layers
decoded1 = Dense(128, activation = 'relu')(encoded3)
decoded2 = Dense(200, activation = 'relu')(decoded1)
decoded3 = Dense(ncol1, activation = 'relu')(decoded2)
# Combine Encoder and Deocder layers
autoencoder1 = Model(inputs = input_dim1, outputs = decoded3)

# Compile the Model
autoencoder1.compile(optimizer = 'adadelta', loss = 'mse')
autoencoder1.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 12)]              0         
_________________________________________________________________
dense_6 (Dense)              (None, 200)               2600      
_________________________________________________________________
dense_7 (Dense)              (None, 128)               25728     
_________________________________________________________________
dense_8 (Dense)              (None, 80)                10320     
_________________________________________________________________
dense_9 (Dense)              (None, 128)               10368     
_________________________________________________________________
dense_10 (Dense)             (None, 200)               25800     
_________________________________________________________________
dense_11 (Dense)             (None, 12)                2412

In [None]:
# Training a Auto Encoder
autoencoder1.fit(X1_train, X1_train, epochs = 10, batch_size = 32, shuffle = False, validation_data = (X1_test, X1_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f236fbf9c10>

In [None]:
# Getting the encoded values
encoder1 = Model(inputs = input_dim1, outputs = encoded3)
encoded_input = Input(shape = (encoding_dim1, ))
# Decoding the values back
deco = autoencoder1.layers[-3](encoded_input)
deco = autoencoder1.layers[-2](deco)
deco = autoencoder1.layers[-1](deco)
# create the decoder model
decoder1 = Model(encoded_input, deco)

In [None]:
encoded1_train = pd.DataFrame(encoder1.predict(X1_train))
encoded1_test = pd.DataFrame(encoder1.predict(X1_test))
decoded1_train = pd.DataFrame(decoder1.predict(encoded1_train))
decoded1_test = pd.DataFrame(decoder1.predict(encoded1_test))
# Checking the dimensions
print(encoded1_train.shape)
print(decoded1_train.shape)
# Merging the decoded test and train data to compare with X
decoded1_train = decoded1_train.iloc[:, :]
decoded1_test = decoded1_test.iloc[:, :]
decoded1 = pd.concat([decoded1_train, decoded1_test])

(13875, 80)
(13875, 12)


# **Part 2 PCA**

In [None]:
from sklearn.decomposition import PCA
import cv2
pca = PCA(n_components=12)
pca.fit(X1)
trans_pca = pca.transform(X1)
arr1 = pca.inverse_transform(trans_pca)
pred_pca1 = arr1[:,:]
#pred = decoded.to_numpy()
true1 = X1.to_numpy()
print(pred_pca1.shape)
print(true1.shape)

(17344, 12)
(17344, 12)


# **Ques 2 Erro calculation**

In [None]:
# error calculation
pred1 = decoded1.to_numpy()
true1 = X1.to_numpy()
print(pred1.shape)
print(true1.shape)

from keras import backend as K

error1 = K.sqrt(K.mean(K.square(pred1 - true1)))
error1_pca = K.sqrt(K.mean(K.square(pred_pca1 - true1)))
print("RMSE of autoencoder is",error1)
print("RMSE of PCA is",error1_pca)

(17344, 12)
(17344, 12)
RMSE of autoencoder is tf.Tensor(1.012610903252347, shape=(), dtype=float64)
RMSE of PCA is tf.Tensor(7.354611593023853e-16, shape=(), dtype=float64)
