In [None]:
# Wearable sensing
# Our goal: Predict exacerbations of COPD before they happen
# COPD: Chroninc Obstructive Pulmonary Disease
# Exacerbations are episodes in the disease where symptoms are more severe and medical intervention is typically required

In [None]:
# If a patient is wearing a smartwatch, can we tell if their respiratory symptoms are worse, or their baseline
# We ran a study (with ethics approval)
# We gave people with COPD a smartwatch, and smartphone
# The smartwatch would continously record their heart rate, their movement (IMU), as well as their audio (microphone)
# Microphone is a very sensitive modality -> Muffle any speech that occured in the signal
# We could extract features from unmuffled audio (before muffling), and save those
# We could also use features to construct audio back, and compute new features, as long as we didn't listen/store to the reconstructed audio

In [None]:
# FFT (Fast Fourier Transformation) -> Converts signals from the time domain to the frequency domain
# Reversable
# We can reconstruct the audio using the FFT

In [8]:
# pip install pandas
import pandas as pd
import numpy as np
import timeit
import os

In [9]:
# Half of the software was developed by us
# The other half was developed by Samsung Digital Health

In [10]:
# See how many files we have
number_of_files = 122472 #files, each file was 2 minutes of audio
number_of_files

122472

In [11]:
# See what the file looks like
df = pd.read_csv("test.csv", index_col=0)
df

Unnamed: 0,0,4096,-1046,0.1,-4,-648,-40,-17,28,-254,...,-10.26,-22.16,4.27,-20.15,19.13,-19.23,19.14,2.34,13.20,1.35
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1279,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1280,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1281,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1282,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
# Convert the fft representation of the audio back to the waveform
def fft_to_audio(fft_df, fs = 44100):
    fft_df = np.array(fft_df.drop(fft_df.columns[[0,1]], axis=1))
    arr = np.zeros((fft_df.shape[0], 4096))
    arr[:fft_df.shape[0], :fft_df.shape[1]] = fft_df
    lst = []
    
    for ar in arr:
        i = 0
        lst1=[]
        while i < len(ar):
            lst1.append(complex(ar[i], ar[i+1]))
            i+=2
        lst.append(lst1)
        
    fft = np.array(lst)
    data = np.fft.irfft(fft)
    wav_data = np.concatenate(data)
    return wav_data

In [15]:
fft_to_audio(df)

array([0., 0., 0., ..., 0., 0., 0.])

In [16]:
time = timeit.timeit('fft_to_audio(df)', number=1, globals=globals())
time

1.3984144469995954

In [17]:
print(str(number_of_files * time) + " seconds")
print(str((number_of_files * time)/60) + " minutes")
print(str((number_of_files * time)/3600) + " hours")
print(str((number_of_files * time)/86400) + " days")

171266.61415293446 seconds
2854.4435692155744 minutes
47.57405948692624 hours
1.9822524786219267 days


In [18]:
def fft_to_audio_optimized(fft_df, fs = 44100):
    fft_df = np.array(fft_df.drop(fft_df.columns[[0,1]], axis=1))
    arr = np.zeros((fft_df.shape[0], 4096))
    arr[:fft_df.shape[0], :fft_df.shape[1]] = fft_df

    fft = arr[::,::2] + (arr[::,1::2]*1j)
    
    data = np.fft.irfft(fft)
    wav_data = np.concatenate(data)
    return wav_data

In [20]:
fft_to_audio_optimized(df)

array([0., 0., 0., ..., 0., 0., 0.])

In [21]:
time = timeit.timeit('fft_to_audio_optimized(df)', number=1, globals=globals())
time

0.18974981199971808

In [22]:
print(str(number_of_files * time) + " seconds")
print(str((number_of_files * time)/60) + " minutes")
print(str((number_of_files * time)/3600) + " hours")
print(str((number_of_files * time)/86400) + " days")

23239.038975229472 seconds
387.3173162538245 minutes
6.455288604230409 hours
0.2689703585096004 days


In [None]:
# 32 cores -> 64 threads, 128gb of ram

In [21]:
if False:
    from joblib import Parallel, delayed
    res = Parallel(n_jobs=64)(delayed(fft_to_audio_optimized)(i) for i in all_files)# won't o


In [23]:
print(str(((number_of_files) * time)/64) + " seconds")
print(str((((number_of_files) * time)/64)/60) + " minutes")
print(str((((number_of_files) * time)/64)/3600) + " hours")
print(str((((number_of_files) * time)/64)/86400) + " days")

363.1099839879605 seconds
6.051833066466008 minutes
0.10086388444110014 hours
0.004202661851712506 days


In [26]:
import os

# os.system("cat /proc/cpuinfo")
# 4 -> 8 threads


cat: /proc/cpuinfo: No such file or directory


256