# CNN-LSTM feature extraction

# 1- Load dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import scipy.io
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences


# Directory paths
waveform_dir = '/content/final_data_directory'
labels_csv_path = "/content/final_data_directory/class_labels.csv"


def load_data(waveform_dir, labels_csv_path):
    labels_df = pd.read_csv(labels_csv_path, header=None, names=['file_name', 'label'])
    labels = labels_df['label'].values
    file_names = labels_df['file_name'].values

    waveforms = []
    for file_name in file_names:
        mat_path = os.path.join(waveform_dir, file_name + '.mat')
        mat_contents = scipy.io.loadmat(mat_path)
        waveforms.append(mat_contents['val'][0])  # Assuming 'val' is the correct key

    # Determine max length for padding
    max_length = max([len(waveform) for waveform in waveforms])

    # Pad waveforms
    waveforms_padded = pad_sequences(waveforms, maxlen=max_length, padding='post', dtype='float32')

    # Convert labels
    le = LabelEncoder()
    labels_encoded = le.fit_transform(labels)
    labels_categorical = to_categorical(labels_encoded)

    return np.array(waveforms_padded), labels_categorical, le.classes_


waveforms, labels, class_names = load_data(waveform_dir, labels_csv_path)


# 2-Build CNN-LSTM model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, TimeDistributed, Dropout, Flatten

def create_cnn_lstm_model(input_shape, num_classes):
    model = Sequential([
        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'), input_shape=input_shape),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')),
        TimeDistributed(MaxPooling1D(pool_size=2, padding='same')),

        TimeDistributed(Dropout(0.5)),
        TimeDistributed(Flatten()),

        LSTM(200, activation='relu'),
        Dropout(0.5),

        Dense(128, activation='relu'),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 3- train the model

In [None]:
# Ensure waveforms have a channel dimension, reshape if necessary
if waveforms.ndim == 2:
    waveforms = np.expand_dims(waveforms, axis=-1)

# Now check the shape
print("Waveforms shape:", waveforms.shape)

input_shape = waveforms.shape[1:]  # This should now correctly reflect the shape (time steps, 1)
model = create_cnn_lstm_model(input_shape, 4)

# Assuming you've defined your CNN-LSTM model according to the shapes
model.fit(waveforms, labels, epochs=5, batch_size=32, validation_split=0.2)

# 4- Extract feature from the last layer of network

In [None]:
import numpy as np
import pandas as pd
import scipy.io
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
import tensorflow as tf


# This assumes your model's penultimate layer outputs the 60 features you're interested in
feature_layer_name = 'dense'
feature_extraction_model = Model(inputs=model.input, outputs=model.get_layer(feature_layer_name).output)



def extract_features_and_save(waveform_dir, labels_csv_path, output_csv_path):
    labels_df = pd.read_csv(labels_csv_path, header=None, names=['file_name', 'label'])
    file_names = labels_df['file_name'].values

    features = []  # Placeholder for extracted features

    for file_name in file_names:
        mat_path = os.path.join(waveform_dir, file_name + '.mat')
        mat_contents = scipy.io.loadmat(mat_path)
        waveform = mat_contents['val'][0]
        # Reshape waveform for the model
        waveform = waveform.reshape((1, -1, 1))

        # Ensure waveform length matches model's expected input
        waveform_padded = pad_sequences(waveform, maxlen=18000, dtype='float32', padding='post').reshape((1, -1, 1))

        extracted_features = feature_extraction_model.predict(waveform_padded)
        features.append(extracted_features.flatten())  # Flatten the features array

    # Prepare headers for the CSV file
    headers = ['file_name'] + [f'feature{i+1}' for i in range(60)]

    # Convert features to DataFrame
    features_df = pd.DataFrame(features)

    # Insert the file names at the first column of the DataFrame
    features_df.insert(loc=0, column='file_name', value=file_names)

    # Name the columns according to headers
    features_df.columns = headers

    # Save to CSV with headers
    features_df.to_csv(output_csv_path, index=False)

# Specify your directories and output CSV path
waveform_dir = '/content/data/waveforms'
labels_csv_path = "/content/data/labels/labels.csv"
output_csv_path = "/content/data/features/cnn_lstm_features.csv"

# Extract features and save them
extract_features_and_save(waveform_dir, labels_csv_path, output_csv_path)


# 5- Mannual feature extraction of paper

In [22]:
!pip install biosppy

Collecting biosppy
  Downloading biosppy-2.1.2-py2.py3-none-any.whl (142 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/142.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.2/142.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting shortuuid (from biosppy)
  Downloading shortuuid-1.0.11-py3-none-any.whl (10 kB)
Installing collected packages: shortuuid, biosppy
Successfully installed biosppy-2.1.2 shortuuid-1.0.11


In [12]:
# this code is from the paper : https://github.com/Seb-Good/ecg-features


from __future__ import absolute_import, division, print_function
import pywt
import numpy as np
import scipy as sp
from scipy import signal
import os
import time
import pandas as pd
import scipy.io as sio
from biosppy.signals import ecg
from biosppy.signals.tools import filter_signal

def hfd(a, k_max):

    """Compute Higuchi Fractal Dimension of a time series."""

    # Initialize variables
    L = []
    x = []
    N = a.size

    for k in range(1, k_max):
        Lk = 0
        for m in range(0, k):
            idxs = np.arange(1, int(np.floor((N - m) / k)), dtype=np.int32)
            Lmk = np.sum(np.abs(a[m + idxs * k] - a[m + k * (idxs - 1)]))
            Lmk = (Lmk * (N - 1) / (((N - m) / k) * k)) / k
            Lk += Lmk
        L.append(np.log(Lk / (m + 1)))
        x.append([np.log(1.0 / k), 1])
    (p, r1, r2, s) = np.linalg.lstsq(x, L)

    return p[0]





class FullWaveformFeatures:

    """
    Generate a dictionary of full waveform statistics for one ECG signal.

    Parameters
    ----------
    ts : numpy array
        Full waveform time array.
    signal_raw : numpy array
        Raw full waveform.
    signal_filtered : numpy array
        Filtered full waveform.
    rpeaks : numpy array
        Array indices of R-Peaks
    templates_ts : numpy array
        Template waveform time array
    templates : numpy array
        Template waveforms
    fs : int, float
        Sampling frequency (Hz).

    Returns
    -------
    full_waveform_features : dictionary
        Full waveform features.
    """

    def __init__(self, ts, signal_raw, signal_filtered, rpeaks, templates_ts, templates, fs):

        # Set parameters
        self.ts = ts
        self.signal_raw = signal_raw
        self.signal_filtered = signal_filtered
        self.rpeaks = rpeaks
        self.templates_ts = templates_ts
        self.templates = templates
        self.fs = fs

        # Feature dictionary
        self.full_waveform_features = dict()

    def get_full_waveform_features(self):
        return self.full_waveform_features

    def extract_full_waveform_features(self):
        self.full_waveform_features.update(self.calculate_basic_features())
        self.full_waveform_features.update(self.calculate_stationary_wavelet_transform_features())

    def calculate_basic_features(self):

        # Empty dictionary
        basic_features = dict()

        # Calculate statistics
        basic_features['full_waveform_min'] = np.min(self.signal_filtered)
        basic_features['full_waveform_max'] = np.max(self.signal_filtered)
        basic_features['full_waveform_mean'] = np.mean(self.signal_filtered)
        basic_features['full_waveform_median'] = np.median(self.signal_filtered)
        basic_features['full_waveform_std'] = np.std(self.signal_filtered)
        basic_features['full_waveform_skew'] = sp.stats.skew(self.signal_filtered)
        basic_features['full_waveform_kurtosis'] = sp.stats.kurtosis(self.signal_filtered)
        basic_features['full_waveform_duration'] = np.max(self.ts)

        return basic_features

    def calculate_stationary_wavelet_transform_features(self):

        # Empty dictionary
        stationary_wavelet_transform_features = dict()

        # Decomposition level
        decomp_level = 4

        # Stationary wavelet transform
        swt = self.stationary_wavelet_transform(self.signal_filtered, wavelet='db4', level=decomp_level)

        # Set frequency band
        freq_band_low = (3, 10)
        freq_band_med = (10, 30)
        freq_band_high = (30, 45)

        """Frequency Domain"""
        for level in range(len(swt)):

            """Detail"""
            # Compute Welch periodogram
            fxx, pxx = signal.welch(x=swt[level]['d'], fs=self.fs)

            # Get frequency band
            freq_band_low_index = np.logical_and(fxx >= freq_band_low[0], fxx < freq_band_low[1])
            freq_band_med_index = np.logical_and(fxx >= freq_band_med[0], fxx < freq_band_med[1])
            freq_band_high_index = np.logical_and(fxx >= freq_band_high[0], fxx < freq_band_high[1])

            # Calculate maximum power
            max_power_low = np.max(pxx[freq_band_low_index])
            max_power_med = np.max(pxx[freq_band_med_index])
            max_power_high = np.max(pxx[freq_band_high_index])

            # Calculate average power
            mean_power_low = np.trapz(y=pxx[freq_band_low_index], x=fxx[freq_band_low_index])
            mean_power_med = np.trapz(y=pxx[freq_band_med_index], x=fxx[freq_band_med_index])
            mean_power_high = np.trapz(y=pxx[freq_band_high_index], x=fxx[freq_band_high_index])

            # Calculate max/mean power ratio
            stationary_wavelet_transform_features['swt_d_' + str(level+1) + '_low_power_ratio'] = \
                max_power_low / mean_power_low
            stationary_wavelet_transform_features['swt_d_' + str(level+1) + '_med_power_ratio'] = \
                max_power_med / mean_power_med
            stationary_wavelet_transform_features['swt_d_' + str(level+1) + '_high_power_ratio'] = \
                max_power_high / mean_power_high

            """Approximation"""
            # Compute Welch periodogram
            fxx, pxx = signal.welch(x=swt[level]['a'], fs=self.fs)

            # Get frequency band
            freq_band_low_index = np.logical_and(fxx >= freq_band_low[0], fxx < freq_band_low[1])
            freq_band_med_index = np.logical_and(fxx >= freq_band_med[0], fxx < freq_band_med[1])
            freq_band_high_index = np.logical_and(fxx >= freq_band_high[0], fxx < freq_band_high[1])

            # Calculate maximum power
            max_power_low = np.max(pxx[freq_band_low_index])
            max_power_med = np.max(pxx[freq_band_med_index])
            max_power_high = np.max(pxx[freq_band_high_index])

            # Calculate average power
            mean_power_low = np.trapz(y=pxx[freq_band_low_index], x=fxx[freq_band_low_index])
            mean_power_med = np.trapz(y=pxx[freq_band_med_index], x=fxx[freq_band_med_index])
            mean_power_high = np.trapz(y=pxx[freq_band_high_index], x=fxx[freq_band_high_index])

            # Calculate max/mean power ratio
            stationary_wavelet_transform_features['swt_a_' + str(level+1) + '_low_power_ratio'] = \
                max_power_low / mean_power_low
            stationary_wavelet_transform_features['swt_a_' + str(level+1) + '_med_power_ratio'] = \
                max_power_med / mean_power_med
            stationary_wavelet_transform_features['swt_a_' + str(level+1) + '_high_power_ratio'] = \
                max_power_high / mean_power_high

        """Non-Linear"""
        for level in range(len(swt)):

            """Detail"""
            # Log-energy entropy
            stationary_wavelet_transform_features['swt_d_' + str(level+1) + '_energy_entropy'] = \
                np.sum(np.log10(np.power(swt[level]['d'], 2)))

            # Higuchi_fractal
            stationary_wavelet_transform_features['swt_d_' + str(level+1) + '_higuchi_fractal'] = \
                hfd(swt[level]['d'], k_max=10)

            """Approximation"""
            # Log-energy entropy
            stationary_wavelet_transform_features['swt_a_' + str(level+1) + '_energy_entropy'] = \
                np.sum(np.log10(np.power(swt[level]['a'], 2)))

            # Higuchi_fractal
            stationary_wavelet_transform_features['swt_a_' + str(level+1) + '_higuchi_fractal'] = \
                hfd(swt[level]['a'], k_max=10)

        return stationary_wavelet_transform_features

    @staticmethod
    def calculate_decomposition_level(waveform_length, level):

        # Set starting multiplication factor
        factor = 0

        # Set updated waveform length variable
        waveform_length_updated = None

        # If waveform is not the correct length for proposed decomposition level
        if waveform_length % 2**level != 0:

            # Calculate remainder
            remainder = waveform_length % 2**level

            # Loop through multiplication factors until minimum factor found
            while remainder != 0:

                # Update multiplication factor
                factor += 1

                # Update waveform length
                waveform_length_updated = factor * waveform_length

                # Calculate updated remainder
                remainder = waveform_length_updated % 2**level

            return waveform_length_updated

        # If waveform is the correct length for proposed decomposition level
        else:
            return waveform_length

    @staticmethod
    def add_padding(waveform, waveform_length_updated):

        # Calculate required padding
        pad_count = np.abs(len(waveform) - waveform_length_updated)

        # Calculate before waveform padding
        pad_before = int(np.floor(pad_count / 2.0))

        # Calculate after waveform padding
        pad_after = pad_count - pad_before

        # Add padding to waveform
        waveform_padded = np.append(np.zeros(pad_before), np.append(waveform, np.zeros(pad_after)))

        return waveform_padded, pad_before, pad_after

    def stationary_wavelet_transform(self, waveform, wavelet, level):

        # Calculate waveform length
        waveform_length = len(waveform)

        # Calculate minimum waveform length for SWT of certain decomposition level
        waveform_length_updated = self.calculate_decomposition_level(waveform_length, level)

        # Add necessary padding to waveform
        waveform_padded, pad_before, pad_after = self.add_padding(waveform, waveform_length_updated)

        # Compute stationary wavelet transform
        swt = pywt.swtn(waveform_padded, wavelet=wavelet, level=level, start_level=0)

        # Loop through decomposition levels and remove padding
        for lev in range(len(swt)):

            # Approximation
            swt[lev]['a'] = swt[lev]['a'][pad_before:len(waveform_padded) - pad_after]

            # Detail
            swt[lev]['d'] = swt[lev]['d'][pad_before:len(waveform_padded) - pad_after]

        return swt



class Features:

    def __init__(self, file_path, fs, labels=None):

        # Set parameters
        self.file_path = file_path
        self.fs = fs
        self.labels = labels

        # Set attributes
        self.features = None

    def get_features(self):
        return self.features

    def extract_features(self, filter_bandwidth, n_signals=None, show=False, labels=None,
                         normalize=True, polarity_check=True, template_before=0.2, template_after=0.4):

        # Create empty features DataFrame
        self.features = pd.DataFrame()

        # Get list of .mat files
        file_names = self._get_file_names(n_signals=n_signals)

        # Loop through .mat files
        for file_name in file_names:

            try:

                # Get start time
                t_start = time.time()

                # Load .mat file
                signal_raw = self._load_mat_file(file_name=file_name)

                # Preprocess signal
                ts, signal_raw, signal_filtered, rpeaks, templates_ts, templates = self._preprocess_signal(
                    signal_raw=signal_raw, filter_bandwidth=filter_bandwidth, normalize=normalize,
                    polarity_check=polarity_check, template_before=template_before, template_after=template_after
                )

                # Extract features from waveform
                features = self._group_features(file_name=file_name, ts=ts, signal_raw=signal_raw,
                                                signal_filtered=signal_filtered, rpeaks=rpeaks,
                                                templates_ts=templates_ts, templates=templates,
                                                template_before=template_before, template_after=template_after)

                # Append feature vector
                self.features = self.features.append(features, ignore_index=True)

                # Get end time
                t_end = time.time()

                # Print progress
                if show:
                    print('Finished extracting features from ' + file_name + '.mat | Extraction time: ' +
                          str(np.round((t_end - t_start) / 60, 3)) + ' minutes')

            except ValueError:
                print('Error loading ' + file_name + '.mat')

        # Add labels
        self._add_labels(labels=labels)

    def _add_labels(self, labels):
        """Add label to feature DataFrame."""
        if labels is not None:
            self.features = pd.merge(labels, self.features, on='file_name')

    def _get_file_names(self, n_signals):
        """Get list of .mat file names in file path."""
        file_names = [file.split('.')[0] for file in os.listdir(self.file_path) if file.endswith('.mat')]

        return self._get_n_signals(file_names=file_names, n_signals=n_signals)

    @staticmethod
    def _get_n_signals(file_names, n_signals):
        """Get list of file names equal to n_signals."""
        if n_signals is not None:
            return file_names[0:n_signals]
        else:
            return file_names

    def _load_mat_file(self, file_name):
        """Loads ECG signal to numpy array from .mat file."""
        return sio.loadmat(os.path.join(self.file_path, file_name))['val'][0].astype('float')

    def _preprocess_signal(self, signal_raw, filter_bandwidth, normalize, polarity_check,
                           template_before, template_after):

        # Filter signal
        signal_filtered = self._apply_filter(signal_raw, filter_bandwidth)

        # Get BioSPPy ECG object
        ecg_object = ecg.ecg(signal=signal_raw, sampling_rate=self.fs, show=False)

        # Get BioSPPy output
        ts = ecg_object['ts']          # Signal time array
        rpeaks = ecg_object['rpeaks']  # rpeak indices

        # Get templates and template time array
        templates, rpeaks = self._extract_templates(signal_filtered, rpeaks, template_before, template_after)
        templates_ts = np.linspace(-template_before, template_after, templates.shape[1], endpoint=False)

        # Polarity check
        signal_raw, signal_filtered, templates = self._check_waveform_polarity(polarity_check=polarity_check,
                                                                               signal_raw=signal_raw,
                                                                               signal_filtered=signal_filtered,
                                                                               templates=templates)
        # Normalize waveform
        signal_raw, signal_filtered, templates = self._normalize_waveform_amplitude(normalize=normalize,
                                                                                    signal_raw=signal_raw,
                                                                                    signal_filtered=signal_filtered,
                                                                                    templates=templates)
        return ts, signal_raw, signal_filtered, rpeaks, templates_ts, templates

    @staticmethod
    def _check_waveform_polarity(polarity_check, signal_raw, signal_filtered, templates):

        """Invert waveform polarity if necessary."""
        if polarity_check:

            # Get extremes of median templates
            templates_min = np.min(np.median(templates, axis=1))
            templates_max = np.max(np.median(templates, axis=1))

            if np.abs(templates_min) > np.abs(templates_max):
                return signal_raw * -1, signal_filtered * -1, templates * -1
            else:
                return signal_raw, signal_filtered, templates

    @staticmethod
    def _normalize_waveform_amplitude(normalize, signal_raw, signal_filtered, templates):
        """Normalize waveform amplitude by the median R-peak amplitude."""
        if normalize:

            # Get median templates max
            templates_max = np.max(np.median(templates, axis=1))

            return signal_raw / templates_max, signal_filtered / templates_max, templates / templates_max

    def _extract_templates(self, signal_filtered, rpeaks, before, after):

        # convert delimiters to samples
        before = int(before * self.fs)
        after = int(after * self.fs)

        # Sort R-Peaks in ascending order
        rpeaks = np.sort(rpeaks)

        # Get number of sample points in waveform
        length = len(signal_filtered)

        # Create empty list for templates
        templates = []

        # Create empty list for new rpeaks that match templates dimension
        rpeaks_new = np.empty(0, dtype=int)

        # Loop through R-Peaks
        for rpeak in rpeaks:

            # Before R-Peak
            a = rpeak - before
            if a < 0:
                continue

            # After R-Peak
            b = rpeak + after
            if b > length:
                break

            # Append template list
            templates.append(signal_filtered[a:b])

            # Append new rpeaks list
            rpeaks_new = np.append(rpeaks_new, rpeak)

        # Convert list to numpy array
        templates = np.array(templates).T

        return templates, rpeaks_new

    def _apply_filter(self, signal_raw, filter_bandwidth):
        """Apply FIR bandpass filter to waveform."""
        signal_filtered, _, _ = filter_signal(signal=signal_raw, ftype='FIR', band='bandpass',
                                              order=int(0.3 * self.fs), frequency=filter_bandwidth,
                                              sampling_rate=self.fs)
        return signal_filtered

    def _group_features(self, file_name, ts, signal_raw, signal_filtered, rpeaks,
                        templates_ts, templates, template_before, template_after):

        """Get a dictionary of all ECG features"""

        # Empty features dictionary
        features = dict()

        # Set ECG file name
        features['file_name'] = file_name

        # Extract features
        full_waveform_features = FullWaveformFeatures(ts=ts, signal_raw=signal_raw,
                                                      signal_filtered=signal_filtered, rpeaks=rpeaks,
                                                      templates_ts=templates_ts, templates=templates,
                                                      fs=self.fs)
        full_waveform_features.extract_full_waveform_features()

        # Update feature dictionary
        features.update(full_waveform_features.get_full_waveform_features())


        return pd.Series(data=features)

In [7]:

import warnings
warnings.filterwarnings('ignore')

# Sampling frequency (Hz)
fs = 300


# Data paths
label_path =  "/content/final_data_directory"
waveform_path = "/content/final_data_directory"
feature_path = "/content/final_data_directory/features"

# Read labels CSV
labels = pd.read_csv(os.path.join(label_path, 'class_labels.csv'), names=['file_name', 'label'])

# Instantiate
ecg_features = Features(file_path=waveform_path, fs=fs)

# Calculate ECG features
ecg_features.extract_features(
    filter_bandwidth=[3, 45], n_signals=None, show=True,
    labels=labels, normalize=True, polarity_check=True,
    template_before=0.25, template_after=0.4
)


# Get features DataFrame
features = ecg_features.get_features()

# View DataFrame
features.head(10)


features.to_csv(os.path.join(feature_path, 'features.csv'), index=False)

# 6- Merge all features

In [None]:
import pandas as pd

# Paths to your CSV files
cnn_lstm_features_path = '/content/final_data_directory/features/cnn_lstm_features.csv'
features_path = '/content/final_data_directory/features/features.csv'
merged_csv_path = '/content/merged_features.csv'

# Load the data
df_cnn_lstm_features = pd.read_csv(cnn_lstm_features_path)
df_features = pd.read_csv(features_path)



merged_df = pd.merge(df_features, df_cnn_lstm_features)

merged_df.head()

merged_df.to_csv(merged_csv_path, index=False, header=True)