<a href="https://colab.research.google.com/github/supertime1/Floyer_Analysis/blob/main/Cosinuss.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1. Set up dependency

In [None]:
import sys
sys.path.append('C:/Users/57lzhang.US04WW4008/PycharmProjects/cosinuss')

In [None]:
from data_container import config, DataFile
from data_container.api_db_sync import DBSync
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
import pickle

#2.Connect to Cosinuss Database

Open redis-server.exe first before running configure_api() 

In [None]:
def configure_api(db_name, username, update_local=False):
    """
    Configure api as the data handler
    """
    try:
        config.init(db_name=db_name)
        api = DBSync(username=username,
                    server = 'https://telecovid.earconnect.de')
    except:
        print('Configure API failed')
        return
    # password: teeshau7aiMonoh9ee
    if update_local:
    # download data from the server
        api.pull_all_dfs(prj_hash_id='M9KH')
    
    df_list_local = api.df_list(prj_hash_id='M9KH')
    print('There are', len(df_list_local), 'files in local database')
    return api


api = configure_api('sonova_analysis', 'sonova.fremont.api')

2021-03-01 08:37:01 - ERROR -       dc_config:  52 - data_container.config.init() can only be called once
2021-03-01 08:37:02 -  INFO -       api_login: 241 - login: sonova.fremont.api @ https://telecovid.earconnect.de


··········


2021-03-01 08:37:09 -  INFO -       api_login: 253 - successful login, it expires 2021-03-01 20:37:09 expiring in 720.0 min


There are 163 files in local database


In [None]:
def generate_table(date):
    """
    Convert local database into a Pandas dataframe, for ease of data analysis

    params:
    date: select the date that data is generated

    outputs:
    an overview table in the form of pandas dataframe
    """
    table = api.overview_dfs()
    overview_df = pd.DataFrame(table[1:], columns=table[0])
    # change to lower case for the device name
    overview_df['device'] = overview_df['device'].str.lower()
    overview_df.loc[overview_df.device.str.match(r'^polar'), 'device']= 'polar'
    overview_df = overview_df[overview_df.when > date]
    overview_df['when'] = [datetime.datetime.strptime(i, '%Y-%m-%d %H:%M:%S') for 
                           i in list(overview_df['when'])]
    overview_df['duration'] = [datetime.datetime.strptime(i, '%H:%M:%S') for 
                               i in list(overview_df['duration'])]
    overview_df['end'] = [(overview_df.when.loc[i] 
                           - datetime.datetime(1900, 1, 1) 
                           + overview_df.duration.loc[i]) for i in overview_df.index]
    return overview_df

table = generate_table('2020-12-31')
print('There are', len(table), 'test records')
table.head()

There are 63 test records


Unnamed: 0,person,when,device,df id,duration,samples,cols,end
100,M9KH.BZYG (RaHe),2021-02-08 14:15:09,polar,1C2RATHBVH.C4CDEZ,1900-01-01 00:34:48,2.1 K,heart_rate,2021-02-08 14:49:57
101,M9KH.BZYG (RaHe),2021-02-08 14:15:14,cshell,1C2RATHBVH.38CD8K,1900-01-01 00:34:42,1.7 M,"ppg_ir, ppg_ir_2, ppg_ir_3, ppg_...",2021-02-08 14:49:56
102,M9KH.BZYG (RaHe),2021-02-08 14:15:23,cshell,1C2RATHBVH.LEMWKL,1900-01-01 00:34:36,1.7 M,"ppg_ir, ppg_ir_2, ppg_ir_3, ppg_...",2021-02-08 14:49:59
103,M9KH.BZYG (RaHe),2021-02-08 14:15:35,garmin,8QEK89R9B4.NZH5XL,1900-01-01 00:45:27,2.7 K,heart_rate,2021-02-08 15:01:02
104,M9KH.ME9A (LeLo),2021-02-08 15:23:00,polar,1C2RATHBVH.XCMFW1,1900-01-01 00:34:34,2.1 K,heart_rate,2021-02-08 15:57:34


In [None]:
 def correct_label(table):
    """
    fix the labeling problem of cshell and biometric
    
    params:
    overview table from generate_table function

    outputs:
    updated table with correct cshell and biometric labels

    """
    def get_device_model(df):
        """
        get device
        
        params:
        df - output of api.pull_df
        
        outputs:
        correct device label for df
        """
        # get device
        if df.device:
            device_model = df.device.device_model
        else:
            device_model = df.device_model
        
        if 'polar' in device_model.lower():
            return 'polar'

        elif 'biomeric' in device_model.lower() or 'cshell' in device_model.lower():
            if 'ppg_ir_2' in list(df.cols):
                some_data = df.c.ppg_ir_2.y[600:650]
                if list(some_data):
                    ppg_mean = np.mean(some_data)
                else:
                    return 'na'
            else:
                ppg_mean = 1000
            
            if ppg_mean < 500:
                return 'cshell'
            else:
                return 'biometric'
    
    def get_correct_label(hash_ids, target_device):
        """

        """
        # output list of corrected labels
        corrected_name = []
        # a counter to record how many labels have been corrected after processing
        cnt = 0
        print(f'There are in total {len(hash_ids)} files with {target_device} label')
        for i in range(len(hash_ids)):
            try:
                print(f'api reading {i}th file...')
                df = api.pull_df(list(hash_ids)[i]) 
            # in case reading file failed
            except:
                print(f'api read {i}th file failed!')
                corrected_name.append(table.loc[hash_ids.index[i]].device)
                continue
            # get the new label by api reading the file
            new_label = get_device_model(df)
            corrected_name.append(new_label)
            if new_label != table.loc[hash_ids.index[i]].device:
                print(f'Person {table.loc[hash_ids.index[i]].person} and \
            {target_device} label has been corrected to {new_label}')
                cnt += 1
        print(f'There are in total {cnt} files been corrected')
        return corrected_name

    # get the hash ids of cshell and biometric in original table
    cshell_hash_ids = table[table.device == 'cshell']['df id']
    biometric_hash_ids = table[table.device == 'biometric']['df id']

    # get the row index of cshell and biometric in original table
    original_cshell_index = table[table.device == 'cshell'].index
    original_biometric_index = table[table.device == 'biometric'].index
    
    # update the original table with corrected labels of cshell and biometric
    table.device.loc[original_cshell_index] = get_correct_label(cshell_hash_ids, 
                                                     'cshell')
    table.device.loc[original_biometric_index] = get_correct_label(biometric_hash_ids, 
                                                        'biometric')

    return table

table = correct_label(table)

In [None]:
table.to_pickle("C:/Users/57lzhang.US04WW4008/PycharmProjects/cosinuss/local_db.pkl")
#load table from local directory
table = pd.read_pickle("C:/Users/57lzhang.US04WW4008/PycharmProjects/cosinuss/local_db.pkl")
table.head()

Unnamed: 0,person,when,device,df id,duration,samples,cols,end
100,M9KH.BZYG (RaHe),2021-02-08 14:15:09,polar,1C2RATHBVH.C4CDEZ,1900-01-01 00:34:48,2.1 K,heart_rate,2021-02-08 14:49:57
101,M9KH.BZYG (RaHe),2021-02-08 14:15:14,cshell,1C2RATHBVH.38CD8K,1900-01-01 00:34:42,1.7 M,"ppg_ir, ppg_ir_2, ppg_ir_3, ppg_...",2021-02-08 14:49:56
102,M9KH.BZYG (RaHe),2021-02-08 14:15:23,biometric,1C2RATHBVH.LEMWKL,1900-01-01 00:34:36,1.7 M,"ppg_ir, ppg_ir_2, ppg_ir_3, ppg_...",2021-02-08 14:49:59
103,M9KH.BZYG (RaHe),2021-02-08 14:15:35,garmin,8QEK89R9B4.NZH5XL,1900-01-01 00:45:27,2.7 K,heart_rate,2021-02-08 15:01:02
104,M9KH.ME9A (LeLo),2021-02-08 15:23:00,polar,1C2RATHBVH.XCMFW1,1900-01-01 00:34:34,2.1 K,heart_rate,2021-02-08 15:57:34


In [None]:
def find_pairs_row_index(table, floyer_device='cshell'):
    """
    Filter table with paired polar and floyer devices
    """
    paired_table = table[table.device.isin(['polar', floyer_device])]
    
    paired_idx_lst = []
    # find paried sample ids
    for person in list(paired_table['person'].unique()):
        person_paired_table = paired_table[paired_table.person == person]
        polar_idx_lst = person_paired_table[person_paired_table.device == 'polar'].index
        floyer_idx_lst = person_paired_table[person_paired_table.device == floyer_device].index

        # find pairs by checking the overlapping time (2*O(n^2))
        for polar_idx in polar_idx_lst:
            polar_start_time = person_paired_table.when.loc[polar_idx]
            polar_end_time = person_paired_table.end.loc[polar_idx]

            for floyer_idx in floyer_idx_lst:
                floyer_start_time = person_paired_table.when.loc[floyer_idx]
                floyer_end_time = person_paired_table.end.loc[floyer_idx]
                
                if floyer_start_time >= polar_end_time or polar_start_time >= floyer_end_time:
                    continue
                else:
                    paired_idx_lst.append([polar_idx, floyer_idx])
    
    return paired_idx_lst

paired_idx_lst = find_pairs_row_index(table, floyer_device='cshell')
paired_idx_lst

[[100, 101],
 [104, 106],
 [117, 116],
 [120, 121],
 [126, 124],
 [130, 128],
 [142, 141],
 [144, 146],
 [148, 150],
 [160, 162]]

In [None]:
def generate_paired_samples(paired_idx_lst, table, hr_algo_version):
    """
    Use the index of paired devices (output from find_pairs_row_index) to generate
    a list of PairedSample instances

    params:
    paired_idx_lst: a list containing the row index of paired devices, 
                    It is the output of function find_pairs_row_index.
    table: a Pandas dataframe that stores test infomation, it is the output of the
           correct_label() 

    outputs:
    A list of PairedSample instances
    """
    paired_sample_lst = []
    for polar_idx, floyer_idx in paired_idx_lst:
        polar_hash_id = table['df id'].loc[polar_idx]
        floyer_hash_id = table['df id'].loc[floyer_idx]
        paired_sample_lst.append(PairedSample(polar_hash_id, floyer_hash_id, hr_algo_version))
    return paired_sample_lst

paired_sample_lst = generate_paired_samples(paired_idx_lst, table, 'v0')
# save the paried sample list to save time on next run
with open("C:/Users/57lzhang.US04WW4008/PycharmProjects/cosinuss/paired_sample.pkl", "wb") as fp:
    pickle.dump(paired_sample_lst, fp)

2021-03-01 15:24:25 -  INFO -     api_db_sync: 429 - Data file 1C2RATHBVH.C4CDEZ completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 
2021-03-01 15:25:11 -  INFO -     api_db_sync: 429 - Data file 1C2RATHBVH.38CD8K completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 
2021-03-01 15:25:14 -  INFO -     api_db_sync: 429 - Data file 1C2RATHBVH.XCMFW1 completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 
2021-03-01 15:25:57 -  INFO -     api_db_sync: 429 - Data file 1C2RATHBVH.G2HHR3 completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 
2021-03-01 15:26:01 -  INFO -     api_db_sync: 429 - Data file 1C2RATHBVH.2UJU2G completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices

In [None]:
with open("C:/Users/57lzhang.US04WW4008/PycharmProjects/cosinuss/paired_sample.pkl", "rb") as fp:
    paired_sample_lst = pickle.load(fp)

##Continue here

In [None]:
cshell = api.one3_hr_algo('v0', '1C2RATHBVH.LBYCML')
cs_hr_x = np.asarray(cshell['heart_rate_t'])
cs_hr_y = cshell['heart_rate']
cd_q = cshell['quality']
plt.plot(cs_hr_x, cs_hr_y)

##process the df

In [None]:
paired_sample = paired_sample_lst[1]

floyer = api.one3_hr_algo(paired_sample.hr_algo_version, paired_sample.floyer_hash_id)
floyer_hr_x = np.asarray(floyer['heart_rate_t'])
floyer_hr_y = floyer['heart_rate']

In [None]:
polar = DataFile.objects(_hash_id=paired_sample.polar_hash_id).first()
polar_hr_x = polar.c.heart_rate.x
polar_hr_y = polar.c.heart_rate.y

In [None]:
paired_sample.header_floyer.date_time_start

datetime.datetime(2021, 2, 8, 15, 23, 15, 471000, tzinfo=<DstTzInfo 'Europe/Berlin' CET+1:00:00 STD>)

In [None]:
paired_sample.header_polar.date_time_start

datetime.datetime(2021, 2, 8, 15, 23, 0, 73000, tzinfo=<DstTzInfo 'Europe/Berlin' CET+1:00:00 STD>)

In [None]:
paired_sample.header_polar

<DataFile: DataFile(_hash_id=1C2RATHBVH.XCMFW1, date_time_start=2021-02-08 15:23:00.073000+01:00, duration=2074.36, columns=1, slices=2, samples=2073, file_size=8.1 KB)>

In [None]:
paired_sample.header_floyer

<DataFile: DataFile(_hash_id=1C2RATHBVH.G2HHR3, date_time_start=2021-02-08 15:23:15.471000+01:00, duration=2061.51, columns=8, slices=10, samples=1639344, file_size=4.2 MB)>

In [None]:
len(floyer_hr_x)

2043

In [None]:
len(polar_hr_x)

2073

In [None]:
np.all(np.diff(floyer_hr_x) == 1)

True

In [None]:
int_polar = [int(i) for i in polar_hr_x]

In [None]:
len(int_polar)

2087

In [None]:
np.all(np.diff(int_polar) == 1)

False

In [None]:
offset = (paired_sample.header_floyer.date_time_start - 
          paired_sample.header_polar.date_time_start).total_seconds()

In [None]:
offset

15.398

In [None]:
test_start_time = max(paired_sample.header_floyer.date_time_start, paired_sample.header_polar.date_time_start)
test_end_time = 

datetime.datetime(2021, 2, 8, 15, 23, 15, 471000, tzinfo=<DstTzInfo 'Europe/Berlin' CET+1:00:00 STD>)

In [None]:
class PairedSample:
    
    def __init__(self, polar_hash_id, floyer_hash_id, hr_algo_version):
        self.polar_hash_id = polar_hash_id
        self.floyer_hash_id = floyer_hash_id
        # header
        self.header_polar = api.pull_df(polar_hash_id)
        self.header_floyer = api.pull_df(floyer_hash_id)
        self.hr_algo_version = hr_algo_version
    
    def process(self):
        # calculate floyer heart rate
        df_floyer = api.one3_hr_algo(self.hr_algo_version, self.floyer_hash_id)
        floyer_hr_x = np.asarray(floyer['heart_rate_t'])
        floyer_hr_y = floyer['heart_rate']

        # get polar heart rate
        df_polar = DataFile.objects(_hash_id=paired_sample.polar_hash_id).first()
        po_hr_x = self.df_polar.c.heart_rate.x
        po_hr_y = self.df_polar.c.heart_rate.y

        # calculate offsets (i.e. floyer and polar maynot start recording at the
        # same time)
        offset = (df_cshell.date_time_start - 
                  df_polar.date_time_start).total_seconds() + 2098
        # make sure two devices start at the same time
        po_hr_x = po_hr_x[offset:]
        po_hr_y = po_hr_y[offset:]

        # create a new dataframe with three columns: time (s), p_hr, f_hr
        


        self.mape = self.mape() 
        self.icc = self.icc()
        self.da = self.da()

    def mape(self):
        
        return mape
        
    def icc(self):

        return icc
    
    def da(self):
        return da

    def line_plot(self):
        return 
        

    def ba_plot(self):
        return

In [None]:
#import polar data
df_polar = api.pull_df('003RN9.36G4ZN')

df_polar = DataFile.objects(_hash_id='003RN9.36G4ZN').first()
po_hr_x = df_polar.c.heart_rate.x
po_hr_y = df_polar.c.heart_rate.y
offset = (df_cshell.date_time_start - 
          df_polar.date_time_start).total_seconds() + 2098

2021-03-01 10:16:30 -  INFO -     api_db_sync: 429 - Data file 003RN9.36G4ZN completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 


NameError: ignored

In [None]:
df_polar = api.pull_df('003RN9.36G4ZN')


2021-03-01 10:17:20 -  INFO -     api_db_sync: 429 - Data file 003RN9.36G4ZN completely downloaded + database_entries: Scope, Config, Projects (4), Receivers (3), Persons (27), Devices (4)User (6), 


In [None]:
# extract win from data dx (time) and dy (values) of equidistant x_step, but limit from time range x_start to x_end
def extract_win(dx, dy, x_step, x_start=None, x_end=None, mean='median'):
    # if x_start and x_stop not defined take the whole input data
    if x_start is None:
        x_start = dx[0]
    if x_end is None:
        x_end = dx[-1]

    # x and y data windows
    wx = []
    wy = []

    i = 0
    x = x_start
    # loop the data
    while True:
        y_tmp = []
        # loop in one window
        while True:
            if i >= len(dx) or dx[i] >= x + x_step:
                break
            elif dx[i] < x:
                i += 1
            elif dx[i] < x + x_step and dx[i] >= x:
                y_tmp.append(dy[i])
                i += 1
        wx.append(x)
        if y_tmp:
            if mean == 'median':
                wy.append(np.median(y_tmp))
            elif mean == 'mean':
                wy.append(np.mean(y_tmp))
            elif mean == 'max':
                wy.append(np.max(y_tmp))
        else:
            wy.append(np.nan)
        x += x_step
        # stop at the very end
        if x > x_end:
            break

    return wx, wy

In [None]:
polar_hr_x_w, polar_hr_y_w = extract_win(polar_hr_x, polar_hr_x, 6, x_start=x_start, x_end=x_end, mean='median')

offset = int(polar_start_time - floyer_start_time)

# if polar started later
if offset >= 0: 
    x_start = floyer_hr_x + offset
    floyer_hr_x_w = floyer_hr_x[offset:]
    floyer_hr_y_w = floyer_hr_y[offset:]
    polar_hr_x_w = polar_hr_x_w
    polar_hr_y_w = polar_hr_y_w

# if floyer started later:
if offset < 0:
    x_start = floyer_hr_x
    floyer_hr_x_w = floyer_hr_x
    floyer_hr_y_w = floyer_hr_y
    polar_hr_x_w = polar_hr_x_w[offset:]
    polar_hr_y_w = polar_hr_y_w[offset:]
    
