# Fourier Transform script

In [1]:
import re

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

import scipy.signal
import sklearn
from sklearn import preprocessing

In [2]:
def FourierTransform(signal, time, sample_rate=1000):
    """
    A function that performs Fourier Transform. Creates DataFrame for saving obtained results.
    
    Parameters
    ----------
    signal: array_like.
    ECG data.
    
    time: int.
    ECG signal recording time. Usually obtained from initial data.
    
    sample_rate: int. Default=1000.
    Sample rate of used digital ECG system. Usually obtained from initial data.
    
    Returns
    -------
    df: pandas DataFrame.
    A table with all necessary information and obtained from Fourier Transformation results.
    """
    N = sample_rate * time
    fourier = np.fft.rfft(a=signal, n=N)
    frequency = np.fft.rfftfreq(n=N, d=1/sample_rate)

    return fourier, frequency

In [3]:
def zeros_padding(data, share):
    """
    A function that adds zeros to the beginning and end of the transmitted data.
    The number of zeros at the beginning and end is equal to the fraction
    
    Parameters
    ----------
    data: 1D array_like.
    Data to be modified.
    
    share: int.
    Reflects the total fraction of zeros relative to the length of the transmitted array, 
    which will be added to the beginning and end by half, respectively.
    
    Returns
    -------
    zeros: 1D array_like.
    Modified data with zeros at the beginning and end
    """
    if share == 0: return data
    zeros = list()
    zeros = [0]*int(len(data) * share/2)
    data.extend([0 for _ in range(int(len(data) * share/2))])
    zeros.extend(data)
    return zeros

In [4]:
def processing(data, ds_factor, Wn):
    """
    A function which decimates and filters the given data.
    
    Parameters
    ----------
    data: array_like.
    Given data to be processed.
    
    ds_factor: float.
    Downsampling factor corresponding the degree of downsampling. For example, 2 means, that data will be reduced in twice.
    
    Wn: float or array_like.
    The critical frequency or frequencies.
    
    Returns
    -------
    filtered: array_like.
    Decimated and filtered data.
    """
    decimated = scipy.signal.decimate(data, ds_factor, ftype='iir')
    b, a = scipy.signal.butter(3, Wn)
    filtered = scipy.signal.filtfilt(b, a, decimated, method='gust')
#     normed_filtered = preprocessing.normalize(filtered) Do we really need it?
    return filtered

In [5]:
def plotter(ecg_data, fourier, frequencies):
    """
    A function that plots graphics.
    
    Parameters:
    ----------
    ecg_data: array_like.
    Some ECG data before transformation.
    
    fourier: array_like.
    Fourier transformation to data.
    
    frequencies: array_like.
    Frequencies of waves obtained from Fourier Transform.
    
    Returns
    -------
    None.
    """
    # Original Signal in Frequency Domain plotting
    plt.figure(figsize=(15, 8))
    plt.title("Original Signal in Frequency Domain")
    plt.xlabel("Number of measurements")
    plt.plot(ecg_data)
    plt.show()
    
    # Obtained Spectrum plotting 
    plt.figure(figsize=(10,10))
    plt.title("Spectrum")
    plt.plot(frequencies, np.abs(fourier), color='darkblue')
    plt.xlim(0,40)
    plt.show()
    
    # To check if inverse Fourier works well
    inverse_fourier = np.fft.irfft(fourier)
    plt.figure(figsize=(20, 10))
    plt.plot(ecg_data, color='red', label='ECG data')
    plt.plot(inverse_fourier, '--b', label='Inverse Fourier Transform data', alpha=0.7)
    plt.title("The inverse Fourier Transform and Original data")
    plt.legend(loc='upper left')
    plt.show()

In [6]:
def main(data):
    """
    Some kind of main function which calls everything else.
    
    Parameters
    ----------
    data: array_like.
    Data to process.
    
    Returns
    -------
    None.
    """
    df = pd.DataFrame(columns = ['cid', 'pid', 'duration', 'disease_diabetes', 
                            'disease_ibs', 'respiratory_disease', 'fourier_transform', 'frequencies'])
    
    for index in range(data.shape[0]):
        ecg = list()
        for string in data['data'][index].split(','):
            ecg.extend(float(s) for s in re.findall(r'[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?', string))
    
        # Maybe future implementation of processing data via zeros padding,filteing and normalization. Needs to be tested more.
        # ecg = zeros_padding(data=ecg, share=share)
        # ecg = processing(ecg, 3, 0.3)
    
        fourier, frequency = FourierTransform(ecg, data['durarion'][index])
        frequency_cutted = frequency[np.where(frequency <= 40)]
        
        # plotter(ecg, fourier, frequency) # If plotting is needed
        
        FourierAndFrequency = {'cid': data['cid'][index], 'pid': data['pid'][index], 'duration': data['durarion'][index],
                              'disease_diabetes': data['diseasediabetes'][index], 'disease_ibs': data['diseaseibs'][index],
                              'respiratory_disease': data['respiratory_disease'][index], 
                               'fourier_transform': fourier[:len(frequency_cutted)].tolist(), 
                              'frequencies': frequency_cutted.tolist()} 
        df = df.append(FourierAndFrequency, ignore_index=True)
    return df

In [7]:
data  = pd.read_csv('/home/artem/Electrocardiography/200 диабет 1 минута.csv')
data['data'] = data['data'].str.strip('{}')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   cid                  200 non-null    int64 
 1   pid                  200 non-null    int64 
 2   durarion             200 non-null    int64 
 3   diseasediabetes      200 non-null    int64 
 4   diseaseibs           200 non-null    int64 
 5   respiratory_disease  200 non-null    int64 
 6   data                 200 non-null    object
dtypes: int64(6), object(1)
memory usage: 11.1+ KB


In [8]:
share = 0
results = main(data)
results

Unnamed: 0,cid,pid,duration,disease_diabetes,disease_ibs,respiratory_disease,fourier_transform,frequencies
0,103965,8808,60,1,0,0,"[(2.2867630972690116+0j), (0.6107276765506018-...","[0.0, 0.016666666666666666, 0.0333333333333333..."
1,105441,9173,60,1,0,0,"[(2.3357332893979748+0j), (0.7453159030548033+...","[0.0, 0.016666666666666666, 0.0333333333333333..."
2,105472,9173,60,1,0,0,"[(2.201221597189363+0j), (0.9576531975077545-0...","[0.0, 0.016666666666666666, 0.0333333333333333..."
3,105474,9173,60,1,0,0,"[(1.1427164987494312+0j), (-0.2542415483564003...","[0.0, 0.016666666666666666, 0.0333333333333333..."
4,105480,9173,60,1,0,0,"[(0.9939175014062398+0j), (-0.0767612833304215...","[0.0, 0.016666666666666666, 0.0333333333333333..."
...,...,...,...,...,...,...,...,...
195,137248,996,60,1,0,0,"[(-36.83621892833861+0j), (-34.49957473727327-...","[0.0, 0.016666666666666666, 0.0333333333333333..."
196,137249,996,60,1,0,0,"[(-37.21795005740715+0j), (-34.82104439967837-...","[0.0, 0.016666666666666666, 0.0333333333333333..."
197,137250,996,60,1,0,0,"[(-38.48253722006038+0j), (-36.11913653463371-...","[0.0, 0.016666666666666666, 0.0333333333333333..."
198,137251,996,60,1,0,0,"[(-40.07958831868074+0j), (-37.71394440717863-...","[0.0, 0.016666666666666666, 0.0333333333333333..."


In [30]:
results.to_csv("/home/artem/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Диабет 1 минута.csv", sep=',', index=True, header=True)

## All files into one script

Done in Google Colab, so there're some differences in filenames and no output. But it works, trust me)

In [31]:
files = ['~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Диабет 1 минута.csv',
        '~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Здоровые 1 минута.csv',
        '~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Здоровые 3 минуты.csv',
        '~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное ИБС 5 минут.csv',
        '~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Респираторные 3 минуты.csv',
        '~/Electrocardiography/Фурье до 40 герц/Фурье Обрезанное Сердечники 1 минута.csv'
        ]

In [38]:
combined = pd.DataFrame()

for file in files:
    data = pd.read_csv(file)
    data['filename'] = file
    combined = pd.concat([combined, data])
combined.drop(columns=['Unnamed: 0'], inplace=True)
combined
combined.to_csv('Combined.csv', sep=',', header=True, index=False)