In [5]:
import pandas as pd
import soundfile
import librosa
import numpy as np
import re
import os
import glob

## Feature Extraction

In [6]:
def feature_chromagram(waveform, sample_rate):
    # STFT computed here explicitly; mel spectrogram and MFCC functions do this under the hood
    stft_spectrogram=np.abs(librosa.stft(waveform))
    # Produce the chromagram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    chromagram=np.mean(librosa.feature.chroma_stft(S=stft_spectrogram, sr=sample_rate).T,axis=0)
    return chromagram

def feature_melspectrogram(waveform, sample_rate, n_mels=128):
    # Produce the mel spectrogram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # Using 8khz as upper frequency bound should be enough for most speech classification tasks
    melspectrogram=np.mean(librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=n_mels, fmax=8000).T,axis=0)
    return melspectrogram

def feature_mfcc(waveform, sample_rate, n_mfcc=28):
    # Compute the MFCCs for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # 40 filterbanks = 40 coefficients
    mfc_coefficients=np.mean(librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=n_mfcc).T, axis=0) 
    return mfc_coefficients

In [7]:
n_chroma = 0
n_mels = 0
n_mfcc = 42

def get_features(file):
    # load an individual soundfile
     with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype="float32")
        sample_rate = audio.samplerate # 4000
        # compute features of soundfile
        chromagram = feature_chromagram(waveform, sample_rate)
        melspectrogram = feature_melspectrogram(waveform, sample_rate, n_mels)
        mfc_coefficients = feature_mfcc(waveform, sample_rate, n_mfcc)

        feature_matrix=np.array([])
        # use np.hstack to stack our feature arrays horizontally to create a feature matrix
        feature_matrix = np.hstack((chromagram, melspectrogram, mfc_coefficients))
        
        return feature_matrix

In [8]:
dataset_info = pd.read_csv('assets/the-circor-digiscope-phonocardiogram-dataset-1.0.3/training_data.csv')

outcome_mapping = {'Normal': 1, 'Abnormal': 0}
dataset_info['Mapped_Outcome'] = dataset_info['Outcome'].map(outcome_mapping)
y_dict = dict(zip(dataset_info['Patient ID'], dataset_info['Mapped_Outcome']))

print(y_dict)

{2530: 0, 9979: 0, 9983: 0, 13918: 0, 14241: 0, 14998: 0, 23625: 0, 24160: 0, 29045: 0, 29378: 0, 31737: 0, 33151: 0, 36327: 0, 38337: 0, 39043: 0, 39403: 0, 39456: 0, 40058: 0, 40798: 0, 40840: 0, 43852: 1, 44514: 0, 45843: 0, 46065: 0, 46532: 1, 46579: 0, 46778: 0, 47002: 0, 49558: 0, 49561: 0, 49562: 0, 49568: 1, 49572: 1, 49574: 0, 49577: 1, 49585: 0, 49595: 0, 49598: 1, 49607: 0, 49610: 0, 49618: 0, 49622: 0, 49627: 0, 49628: 0, 49630: 0, 49631: 1, 49638: 0, 49641: 0, 49653: 1, 49659: 0, 49661: 1, 49669: 0, 49678: 1, 49683: 0, 49687: 0, 49691: 0, 49704: 0, 49712: 0, 49719: 1, 49729: 0, 49735: 0, 49745: 0, 49748: 0, 49751: 0, 49754: 0, 49761: 0, 49776: 0, 49808: 1, 49821: 0, 49823: 0, 49824: 0, 49829: 0, 49832: 1, 49838: 0, 49839: 1, 49842: 0, 49850: 0, 49853: 1, 49854: 0, 49873: 0, 49876: 0, 49896: 1, 49897: 1, 49900: 0, 49930: 1, 49931: 1, 49946: 0, 49952: 1, 49959: 1, 49960: 1, 49963: 0, 49966: 0, 49968: 1, 49969: 1, 49970: 1, 49974: 1, 49978: 0, 49979: 1, 49980: 0, 49983: 1, 49

In [9]:
def filter_files_by_keywords_and_extension(folder_path, keywords, extension):
    filtered_files = []
    for filename in os.listdir(folder_path):
        if any(keyword in filename for keyword in keywords) and filename.endswith(extension):
            filtered_files.append(filename)
    return filtered_files

folder_path = 'assets/the-circor-digiscope-phonocardiogram-dataset-1.0.3/training_data'
keywords = ['TV','AV','PV','MV']
extension = '.wav'

filtered_files = filter_files_by_keywords_and_extension(folder_path, keywords, extension)
filtered_files[:5]


['13918_AV.wav',
 '13918_MV.wav',
 '13918_PV.wav',
 '13918_TV.wav',
 '14241_AV.wav']

In [10]:
def load_data(filtered_files):
    X, y = [], []
    count = 0
    for file in filtered_files:
        file_path = os.path.join(folder_path, file)
        features = get_features(file_path) 
        file_number = int(re.match(r'^([^_]*)', file)[1])
        label = y_dict[file_number]
        X.append(features)
        y.append(label)
        count += 1
        print('\r' + f'Processed {count}/{len(filtered_files)} audio samples', end=' ')
    print()  # Print a newline after the loop completes
    return np.array(X), np.array(y)


features, labels = load_data(filtered_files)

Processed 3159/3159 audio samples 


In [11]:
pd.DataFrame(features).to_csv("./assets/feature(withoutOpenSmile).csv", header=False, index=False)


In [12]:
print('How many samples in total: ', len(labels))

print('How many samples are Normal: ', sum(labels))

How many samples in total:  3159
How many samples are Normal:  1632


In [13]:
print(f'\nAudio samples represented: {features.shape[0]}')
print(f'Numerical features extracted per sample: {features.shape[1]}')
features_df = pd.DataFrame(features) # make it pretty for display
features_df



print('n_chroma', n_chroma)
print('n_mels',n_mels)
print('mfcc', n_mfcc)



Audio samples represented: 3159
Numerical features extracted per sample: 54
n_chroma 0
n_mels 0
mfcc 42


## Feature Processing

In [14]:
# We would usually use df.describe(), but it provides a bit of a mess of information we don't need at the moment.
def print_features(df):
    # Check chromagram feature values
    features_df_chromagram = df.loc[:,:11]
    chroma_min = features_df_chromagram.min().min()
    chroma_max = features_df_chromagram.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    chroma_mean = features_df_chromagram.stack().mean()
    chroma_stdev = features_df_chromagram.stack().std()
    print(f'{n_chroma} Chromagram features:       \
    min = {chroma_min:.3f}, \
    max = {chroma_max:.3f}, \
    mean = {chroma_mean:.3f}, \
    deviation = {chroma_stdev:.3f}') 

    # Check mel spectrogram feature values
    features_df_melspectrogram = df.loc[:,n_chroma:n_chroma+n_mels-1]
    mel_min = features_df_melspectrogram.min().min()
    mel_max = features_df_melspectrogram.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    mel_mean = features_df_melspectrogram.stack().mean()
    mel_stdev = features_df_melspectrogram.stack().std()
    print(f'\n{n_mels} Mel Spectrogram features: \
    min = {mel_min:.3f}, \
    max = {mel_max:.3f}, \
    mean = {mel_mean:.3f}, \
    deviation = {mel_stdev:.3f}')

    # Check MFCC feature values
    features_df_mfcc = df.loc[:,n_chroma+n_mels:n_chroma+n_mels+n_mfcc-1]
    mfcc_min = features_df_mfcc.min().min()
    mfcc_max = features_df_mfcc.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    mfcc_mean = features_df_mfcc.stack().mean()
    mfcc_stdev = features_df_mfcc.stack().std()
    print(f'\n{n_mfcc} MFCC features:             \
    min = {mfcc_min:.3f},\
    max = {mfcc_max:.3f},\
    mean = {mfcc_mean:.3f},\
    deviation = {mfcc_stdev:.3f}')
    
print_features(features_df)

0 Chromagram features:           min = 0.164,     max = 0.997,     mean = 0.808,     deviation = 0.076

0 Mel Spectrogram features:     min = nan,     max = nan,     mean = nan,     deviation = nan

42 MFCC features:                 min = -344.715,    max = 197.198,    mean = -1.271,    deviation = 37.698


In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
# keep our unscaled features just in case we need to process them alternatively
features_scaled = features 
features_scaled = scaler.fit_transform(features_scaled)

scaler = MinMaxScaler()
# keep our unscaled features just in case we need to process them alternatively
features_minmax = features
features_minmax = scaler.fit_transform(features_minmax)

In [16]:
print('\033[1m'+'Standard Scaling:\n'+'\033[0m')
features_scaled_df = pd.DataFrame(features_scaled)
print_features(features_scaled_df)

print('\n\n\033[1m'+'MinMax Scaling:\n'+'\033[0m')
features_minmax_df = pd.DataFrame(features_minmax)
print_features(features_minmax_df)

[1mStandard Scaling:
[0m
0 Chromagram features:           min = -12.385,     max = 3.475,     mean = 0.000,     deviation = 1.000

0 Mel Spectrogram features:     min = nan,     max = nan,     mean = nan,     deviation = nan

42 MFCC features:                 min = -12.385,    max = 10.532,    mean = -0.000,    deviation = 1.000


[1mMinMax Scaling:
[0m
0 Chromagram features:           min = 0.000,     max = 1.000,     mean = 0.784,     deviation = 0.105

0 Mel Spectrogram features:     min = nan,     max = nan,     mean = nan,     deviation = nan

42 MFCC features:                 min = 0.000,    max = 1.000,    mean = 0.579,    deviation = 0.189


In [17]:
from sklearn.model_selection import train_test_split

############ Unscaled test/train set #############
X_train, X_test, y_train, y_test = train_test_split(
    features, 
    labels, 
    test_size=0.2, 
    random_state=69
)

############ Standard Scaled test/train set ###########
# The labels/classes (y_train, y_test) never change, keep old values 
X_train_scaled, X_test_scaled, _, _ = train_test_split(
    features_scaled, 
    labels, 
    test_size=0.2, 
    random_state=69
)

############# MinMax Scaled test/train set ###############
# The labels/classes (y_train, y_test) never change, keep old values 
X_train_minmax, X_test_minmax, _, _ = train_test_split(
    features_minmax, 
    labels, 
    test_size=0.2, 
    random_state=69
)

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam

def build_model(input_shape, num_classes, model_type='cnn'):
    model = Sequential()
    
    if model_type == 'cnn':
        model.add(Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))
        model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))
        model.add(Flatten())
    elif model_type == 'lstm':
        model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(LSTM(128))
    else:
        raise ValueError("model_type should be 'cnn' or 'lstm'")
    
    # Common part for both architectures
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    
    # Output layer
    if num_classes == 2:
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    elif num_classes > 2:
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    else:
        raise ValueError("num_classes should be >= 2")
    
    return model


In [19]:
input_shape = (54, 1)  # Example input shape
num_classes = 2  # Binary classification
model = build_model(input_shape, num_classes, model_type='cnn')
model.summary()

  super().__init__(


In [32]:
X_train_new, X_val, y_train_new, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)
X_train_scaled_new, X_val_scaled, y_train_new, y_val = train_test_split(
    X_train_scaled, y_train, test_size=0.2, random_state=42
)

In [21]:
model.fit(X_train_new, y_train_new, epochs=60, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5248 - loss: 1.1820 - val_accuracy: 0.5079 - val_loss: 0.7976
Epoch 2/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5407 - loss: 0.6877 - val_accuracy: 0.6265 - val_loss: 0.6620
Epoch 3/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5753 - loss: 0.6771 - val_accuracy: 0.6067 - val_loss: 0.6645
Epoch 4/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5693 - loss: 0.6769 - val_accuracy: 0.5830 - val_loss: 0.6723
Epoch 5/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5835 - loss: 0.6767 - val_accuracy: 0.5711 - val_loss: 0.6710
Epoch 6/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5827 - loss: 0.6700 - val_accuracy: 0.6067 - val_loss: 0.6613
Epoch 7/60
[1m64/64[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x219d864f890>

In [23]:
from sklearn.metrics import confusion_matrix
import numpy as np

y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels


precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Precision: 0.6065573770491803
Recall: 0.6416184971098265
F1 Score: 0.6235955056179775


In [33]:
model.fit(X_train_scaled_new, y_train_new, epochs=60, batch_size=32, validation_data=(X_val_scaled, y_val), verbose=1)

Epoch 1/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5442 - loss: 3.5669 - val_accuracy: 0.4921 - val_loss: 0.8071
Epoch 2/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5904 - loss: 0.8250 - val_accuracy: 0.4921 - val_loss: 0.7220
Epoch 3/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6459 - loss: 0.6382 - val_accuracy: 0.5059 - val_loss: 0.6937
Epoch 4/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6756 - loss: 0.6160 - val_accuracy: 0.4980 - val_loss: 0.7147
Epoch 5/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6818 - loss: 0.5772 - val_accuracy: 0.5040 - val_loss: 0.7416
Epoch 6/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7116 - loss: 0.5527 - val_accuracy: 0.5178 - val_loss: 0.7429
Epoch 7/60
[1m64/64[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x219ea1a8500>

In [34]:
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels


precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Precision: 0.5630252100840336
Recall: 0.9682080924855492
F1 Score: 0.7120085015940489


In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from tensorflow.keras.layers import Flatten, Dense, Dropout

# Create the model
model = Sequential()

# First Convolutional Block
model.add(Conv1D(filters=1024, kernel_size=7, strides=1, padding='same', activation='relu', input_shape=(54, 1))) # Adjust input_shape based on your data
model.add(AveragePooling1D(pool_size=5, strides=2, padding='same'))

# Second Convolutional Block
model.add(Conv1D(filters=512, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

# Third Convolutional Block
model.add(Conv1D(filters=256, kernel_size=5, strides=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

# Fourth Convolutional Block
model.add(Conv1D(filters=64, kernel_size=5, strides=2, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))

# Flatten the output of the convolutional layers
model.add(Flatten())

# Fully Connected Layers
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))  # Assuming a dropout is desired for regularization
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  # Assuming a dropout is desired for regularization
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))  # Assuming a dropout is desired for regularization
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Output layer for 3 classes

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()


  super().__init__(


In [29]:
model.fit(X_train_new, y_train_new, epochs=30, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6222 - loss: 0.6141 - val_accuracy: 0.6107 - val_loss: 0.6714
Epoch 2/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6275 - loss: 0.6224 - val_accuracy: 0.5889 - val_loss: 0.6706
Epoch 3/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6511 - loss: 0.6014 - val_accuracy: 0.5889 - val_loss: 0.6704
Epoch 4/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6446 - loss: 0.6109 - val_accuracy: 0.5988 - val_loss: 0.6651
Epoch 5/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6351 - loss: 0.6191 - val_accuracy: 0.5830 - val_loss: 0.6768
Epoch 6/30
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6546 - loss: 0.6104 - val_accuracy: 0.6107 - val_loss: 0.6661
Epoch 7/30
[1m64/64[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x219ea484b30>

In [31]:
# Make predictions
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Precision: 0.6049382716049383
Recall: 0.708092485549133
F1 Score: 0.6524633821571239


In [35]:
model.fit(X_train_scaled_new, y_train_new, epochs=60, batch_size=32, validation_data=(X_val_scaled, y_val), verbose=1)
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels


precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Epoch 1/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9592 - loss: 0.1083 - val_accuracy: 0.6067 - val_loss: 1.4608
Epoch 2/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9654 - loss: 0.1056 - val_accuracy: 0.6047 - val_loss: 1.5023
Epoch 3/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9618 - loss: 0.1068 - val_accuracy: 0.5850 - val_loss: 1.5111
Epoch 4/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9659 - loss: 0.1011 - val_accuracy: 0.6186 - val_loss: 1.5026
Epoch 5/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9658 - loss: 0.0915 - val_accuracy: 0.6186 - val_loss: 1.5407
Epoch 6/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9572 - loss: 0.0983 - val_accuracy: 0.6186 - val_loss: 1.4450
Epoch 7/60
[1m64/64[0m [32m━━━━━━━━━━

In [43]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)

In [40]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization

model = Sequential()

# Convolutional layer
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Another convolutional layer
model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))

# Flattening followed by dense layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model Summary
model.summary()


  super().__init__(


In [46]:
model.fit(X_train_new, y_train_new, epochs=120, batch_size=32, validation_data=(X_val, y_val), verbose=1, callbacks=[early_stop])


Epoch 1/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5906 - loss: 0.6594 - val_accuracy: 0.6126 - val_loss: 0.6531
Epoch 2/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5946 - loss: 0.6577 - val_accuracy: 0.6245 - val_loss: 0.6528
Epoch 3/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6126 - loss: 0.6532 - val_accuracy: 0.6304 - val_loss: 0.6529
Epoch 4/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6344 - loss: 0.6404 - val_accuracy: 0.6265 - val_loss: 0.6532
Epoch 5/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5965 - loss: 0.6565 - val_accuracy: 0.6285 - val_loss: 0.6532
Epoch 6/120
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5883 - loss: 0.6559 - val_accuracy: 0.6344 - val_loss: 0.6504
Epoch 7/120
[1m64/64[0m [32m━━━

<keras.src.callbacks.history.History at 0x219f1b0e000>

In [47]:
from sklearn.metrics import confusion_matrix
import numpy as np

# Make predictions
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels

from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Precision: 0.5895196506550219
Recall: 0.7803468208092486
F1 Score: 0.6716417910447762


In [36]:
model.fit(X_train_scaled_new, y_train_new, epochs=60, batch_size=32, validation_data=(X_val_scaled, y_val), verbose=1)
y_pred = model.predict(X_test)
y_pred = np.round(y_pred).astype(int)  # Convert probabilities to binary labels


precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Epoch 1/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9694 - loss: 0.0718 - val_accuracy: 0.6047 - val_loss: 1.7975
Epoch 2/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9641 - loss: 0.0897 - val_accuracy: 0.5988 - val_loss: 1.7267
Epoch 3/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9651 - loss: 0.0788 - val_accuracy: 0.6028 - val_loss: 1.7965
Epoch 4/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9718 - loss: 0.0694 - val_accuracy: 0.5771 - val_loss: 1.8422
Epoch 5/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9722 - loss: 0.0653 - val_accuracy: 0.6166 - val_loss: 1.8579
Epoch 6/60
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9748 - loss: 0.0720 - val_accuracy: 0.6265 - val_loss: 1.8573
Epoch 7/60
[1m64/64[0m [32m━━━━━━━━━━