# Importing Libraries

In [2]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

mpl.rcParams['figure.figsize'] = (10,8)
mpl.rcParams['axes.grid'] = False

In [45]:
# Define the file names
# Read the space-delimited text file
Tr_df = pd.read_csv('ECG5000_TRAIN.txt', delim_whitespace=True, header=None)
# Read the space-delimited text file
Ts_df = pd.read_csv('ECG5000_TEST.txt', delim_whitespace=True, header=None)

# Concatenate the DataFrames vertically (add rows)
df = pd.concat([Tr_df, Ts_df], ignore_index=True)
df


  Tr_df = pd.read_csv('ECG5000_TRAIN.txt', delim_whitespace=True, header=None)
  Ts_df = pd.read_csv('ECG5000_TEST.txt', delim_whitespace=True, header=None)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,131,132,133,134,135,136,137,138,139,140
0,1.0,-0.112522,-2.827204,-3.773897,-4.349751,-4.376041,-3.474986,-2.181408,-1.818286,-1.250522,...,0.160348,0.792168,0.933541,0.796958,0.578621,0.257740,0.228077,0.123431,0.925286,0.193137
1,1.0,-1.100878,-3.996840,-4.285843,-4.506579,-4.022377,-3.234368,-1.566126,-0.992258,-0.754680,...,0.560327,0.538356,0.656881,0.787490,0.724046,0.555784,0.476333,0.773820,1.119621,-1.436250
2,1.0,-0.567088,-2.593450,-3.874230,-4.584095,-4.187449,-3.151462,-1.742940,-1.490659,-1.183580,...,1.284825,0.886073,0.531452,0.311377,-0.021919,-0.713683,-0.532197,0.321097,0.904227,-0.421797
3,1.0,0.490473,-1.914407,-3.616364,-4.318823,-4.268016,-3.881110,-2.993280,-1.671131,-1.333884,...,0.491173,0.350816,0.499111,0.600345,0.842069,0.952074,0.990133,1.086798,1.403011,-0.383564
4,1.0,0.800232,-0.874252,-2.384761,-3.973292,-4.338224,-3.802422,-2.534510,-1.783423,-1.594450,...,0.966606,1.148884,0.958434,1.059025,1.371682,1.277392,0.960304,0.971020,1.614392,1.421456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,4.0,-1.122969,-2.252925,-2.867628,-3.358605,-3.167849,-2.638360,-1.664162,-0.935655,-0.866953,...,0.205543,-0.472419,-1.310147,-2.029521,-3.221294,-4.176790,-4.009720,-2.874136,-2.008369,-1.808334
4996,2.0,-0.547705,-1.889545,-2.839779,-3.457912,-3.929149,-3.966026,-3.492560,-2.695270,-1.849691,...,1.218185,1.258419,1.907530,2.280888,1.895242,1.437702,1.193433,1.261335,1.150449,0.804932
4997,2.0,-1.351779,-2.209006,-2.520225,-3.061475,-3.065141,-3.030739,-2.622720,-2.044092,-1.295874,...,-0.896575,-1.512234,-2.076075,-2.586042,-3.322799,-3.627311,-3.437038,-2.260023,-1.577823,-0.684531
4998,2.0,-1.124432,-1.905039,-2.192707,-2.904320,-2.900722,-2.761252,-2.569705,-2.043893,-1.490538,...,-2.495989,-2.821782,-3.268355,-3.634981,-3.168765,-2.245878,-1.262260,-0.443307,-0.559769,0.108568


### 1st column is target columns. class 1 is normal activity and class 2/3/4/5.. are the irregular actiivity of the heart

In [46]:
df.columns

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140],
      dtype='int64', length=141)

In [47]:
df = df.add_prefix('c')

In [48]:
df.columns

Index(['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9',
       ...
       'c131', 'c132', 'c133', 'c134', 'c135', 'c136', 'c137', 'c138', 'c139',
       'c140'],
      dtype='object', length=141)

In [49]:
df['c0'].value_counts()

c0
1.0    2919
2.0    1767
4.0     194
3.0      96
5.0      24
Name: count, dtype: int64

In [50]:
df.describe()

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c131,c132,c133,c134,c135,c136,c137,c138,c139,c140
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,...,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,1.5274,-0.262476,-1.649511,-2.492211,-3.119443,-3.167438,-2.866308,-2.273126,-1.798127,-1.410124,...,0.146319,0.070973,-0.062914,-0.276206,-0.507667,-0.761043,-0.834741,-0.642263,-0.487242,-0.706008
std,0.760372,1.152369,1.445493,1.386409,1.302802,1.104382,0.906133,0.731627,0.6231,0.637149,...,1.171299,1.404842,1.646585,1.895134,2.059758,2.086218,2.05622,1.859296,1.830987,1.414465
min,1.0,-6.729499,-7.090374,-5.132459,-5.363241,-5.375715,-5.330194,-4.78224,-4.311288,-4.071361,...,-3.971715,-4.16704,-4.557576,-4.530488,-4.56327,-5.1501,-4.88046,-5.496107,-5.88089,-6.092227
25%,1.0,-1.004511,-2.701576,-3.668096,-4.227247,-4.00747,-3.480479,-2.779941,-2.165851,-1.774124,...,-0.655753,-1.0893,-1.588885,-2.147449,-2.73155,-3.030509,-2.868331,-2.153513,-1.657771,-1.596692
50%,1.0,-0.297541,-1.661892,-2.585677,-3.387934,-3.468718,-2.947061,-2.285578,-1.750157,-1.42257,...,0.473642,0.658318,0.717808,0.604747,0.334857,-0.030767,-0.195151,-0.194423,-0.160262,-0.565588
75%,2.0,0.500061,-0.67729,-1.513964,-2.235369,-2.530967,-2.398813,-1.823494,-1.484923,-1.063708,...,1.062134,1.16942,1.238687,1.264471,1.234433,1.034584,0.945237,0.951681,1.002692,0.272012
max,5.0,4.966414,3.479689,2.660597,1.899798,2.147015,1.614375,1.868728,1.804251,1.68373,...,2.960969,3.007606,2.85468,2.912394,2.937685,3.351437,3.432519,3.579448,4.425976,7.402103


In [52]:
x_train, x_test, y_train, y_test = train_test_split(df.values, df.values[:, 0:1], test_size=0.2, random_state=42)

In [53]:
x_train.shape

(4000, 141)

In [56]:
scaler = MinMaxScaler()
data_scaled = scaler.fit(x_train)

In [58]:
train_data_scaled = data_scaled.transform(x_train)
test_data_scaled = data_scaled.transform(x_test)

In [59]:
train_data_scaled

array([[0.25      , 0.53813398, 0.59585633, ..., 0.11775889, 0.0569781 ,
        0.17124168],
       [0.25      , 0.53857438, 0.58850356, ..., 0.26561097, 0.25063599,
        0.22736195],
       [0.        , 0.317557  , 0.18654775, ..., 0.71455568, 0.70718614,
        0.51703033],
       ...,
       [0.        , 0.45442466, 0.35540716, ..., 0.77837157, 0.74294265,
        0.50709418],
       [0.25      , 0.40345124, 0.39132017, ..., 0.4868497 , 0.50193227,
        0.47770011],
       [0.        , 0.37134117, 0.25002207, ..., 0.71048947, 0.65626497,
        0.35023474]])