In [34]:
from google.colab import drive

# Mount your Google Drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [35]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import SimpleRNN, Dense

In [36]:


# Define the file paths to your CSV files in Google Drive
# Assuming the folder is named "ECGData" and located in the root directory
ecg_data_folder = '/content/drive/My Drive/ECG Data/'  # Modify this path if your folder is in a different location

# Define the file names
file_names = ['mitbih_test.csv', 'mitbih_train.csv', 'ptbdb_normal.csv', 'ptbdb_abnormal.csv']

# Initialize an empty dictionary to store DataFrames
dataframes = {}

# Read each CSV file into a DataFrame and store it in the dictionary
for file_name in file_names:
    file_path = ecg_data_folder + file_name
    df = pd.read_csv(file_path,header=None)
    dataframes[file_name] = df




In [37]:
# Assuming you've already accessed the file paths as strings
normal_df = dataframes['mitbih_train.csv']  # Access the file path as a string
abnormal_df = dataframes['mitbih_test.csv']  # Access the file path as a string




normal_df.shape,abnormal_df.shape






((87554, 188), (21892, 188))

In [38]:
normal_df.isnull().sum()

0      0
1      0
2      0
3      0
4      0
      ..
183    0
184    0
185    0
186    0
187    0
Length: 188, dtype: int64

In [39]:
ECG_df=pd.concat([normal_df,abnormal_df],axis=0)

In [40]:
ECG_df.isnull().sum()

0      0
1      0
2      0
3      0
4      0
      ..
183    0
184    0
185    0
186    0
187    0
Length: 188, dtype: int64

In [41]:
ECG_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 109446 entries, 0 to 21891
Columns: 188 entries, 0 to 187
dtypes: float64(188)
memory usage: 157.8 MB


In [42]:
ECG_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
count,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,...,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0,109446.0
mean,0.89117,0.758909,0.424503,0.219602,0.201237,0.210298,0.205607,0.201617,0.19848,0.19661,...,0.004937,0.004568,0.004237,0.003914,0.003673,0.003469,0.00321,0.002956,0.002835,0.473439
std,0.239657,0.22119,0.227561,0.207248,0.177191,0.171965,0.178374,0.17702,0.171469,0.168028,...,0.043951,0.042109,0.040471,0.038801,0.037465,0.036552,0.035015,0.033413,0.03262,1.143232
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.922252,0.682648,0.251014,0.048853,0.082418,0.0883,0.073171,0.066093,0.064893,0.068615,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.991202,0.82656,0.430174,0.166355,0.147842,0.15864,0.145078,0.144465,0.150073,0.148849,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.910868,0.579832,0.342707,0.259045,0.2875,0.298343,0.295235,0.290623,0.283465,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0


In [43]:
ECG_df.shape

(109446, 188)

In [44]:
ECG_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,0.977941,0.926471,0.681373,0.245098,0.154412,0.191176,0.151961,0.085784,0.058824,0.049020,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.000000,0.659459,0.186486,0.070270,0.070270,0.059459,0.056757,0.043243,0.054054,0.045946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.925414,0.665746,0.541436,0.276243,0.196133,0.077348,0.071823,0.060773,0.066298,0.058011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.967136,1.000000,0.830986,0.586854,0.356808,0.248826,0.145540,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21887,0.928736,0.871264,0.804598,0.742529,0.650575,0.535632,0.394253,0.250575,0.140230,0.102299,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21888,0.802691,0.692078,0.587444,0.446936,0.318386,0.189836,0.118087,0.077728,0.112108,0.152466,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21889,1.000000,0.967359,0.620178,0.347181,0.139466,0.089021,0.103858,0.100890,0.106825,0.100890,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
21890,0.984127,0.567460,0.607143,0.583333,0.607143,0.575397,0.575397,0.488095,0.392857,0.238095,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0


In [45]:
ECG_df.duplicated().sum()

0

In [46]:
ECG_df.drop_duplicates( keep="first", inplace=True)

In [54]:
X=ECG_df.iloc[:,:186]
y=ECG_df.iloc[:,187]

In [55]:
X=np.array(X)
mms=MinMaxScaler()
mms.fit(X)
mms.transform(X)
X=pd.DataFrame(X)

In [56]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=30)

In [57]:


print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)

Shape of X_train: (87556, 186)
Shape of y_train: (87556,)


In [51]:


# Convert DataFrame X to a NumPy array
X_array = X.values

# Convert Series y to a NumPy array
y_array = y.values

# Define the number of time steps and features
time_steps = X_array.shape[1]  # Number of time steps
num_features = 1  # Number of features per time step

# Define the number of classes (output categories)
num_classes = len(np.unique(y_array))

# Convert labels to one-hot encoding (categorical format)
y_encoded = keras.utils.to_categorical(y_array, num_classes)

# Reshape X to match the input shape expected by LSTM
X_reshaped = X_array.reshape(X_array.shape[0], time_steps, num_features)

# Define the LSTM model
model = keras.Sequential()
model.add(layers.LSTM(128, input_shape=(time_steps, num_features)))
model.add(layers.Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
LSTM=model.fit(X_reshaped, y_encoded, epochs=1, batch_size=32)


