In [1]:
class ARGAnalysis:

    def __init__(self, directory):
        # print('Make sure the directory has forward slashes')
        import os
        self.directory = directory
        os.chdir(directory)
        self.fileNameDF = self.FileNameDF()
        self.eventDict = {'Sync': ('Filename timestamp sync', 1, ''),
                          'BL1': ('Start pre movie pressed', 900, 'qc_ecg_noise_bl1'),
                          'BubblesMod': ('Bubble Lab: Start positive modulation', 120, 'qc_ecg_noise_bubblesmod'),
                          'JetpackMod': ('Jetpack Bootle: Start negative modulation', 120, 'qc_ecg_noise_jetpackmod'),
                          'AstroMod': ('Astro Bootle: Start positive modulation', 120, 'qc_ecg_noise_astromod'),
                          'KartMod': ('Bootle Kart: Start negative modulation', 120, 'qc_ecg_noise_kartmod'),
                          'WizardMod': ("Wizard's Adventure: Start positive modulation", 120, 'qc_ecg_noise_wizardmod'),
                          'BubblesPre': ('Bubble Lab: Game time started', 30, 'qc_ecg_noise_bubblespre'),
                          'JetpackPre': ('Jetpack Bootle: Game time started', 30, 'qc_ecg_noise_jetpackpre'),
                          'AstroPre': ('Astro Bootle: Game time started', 30, 'qc_ecg_noise_astropre'),
                          'KartPre': ('Bootle Kart: Game time started', 30, 'qc_ecg_noise_kartpre'),
                          'WizardPre': ("Wizard's Adventure: Game time started", 30, 'qc_ecg_noise_wizardpre'),
                          'BubblesPost': ('Bubble Lab: Stop positive modulation', 30, 'qc_ecg_noise_bubblespost'),
                          'JetpackPost': ('Jetpack Bootle: Stop negative modulation', 30, 'qc_ecg_noise_jetpackpost'),
                          'AstroPost': ('Atro Bootle: Stop positive modulation', 30, 'qc_ecg_noise_astropost'),
                          'KartPost': ('Bootle Kart: Start negative modulation', 30, 'qc_ecg_noise_kartpost'),
                          'WizardPost': ("Wizard's Adventure: Stop positive modulation", 30, 'qc_ecg_noise_wizardpost'),
                          'BL2': ('Start post movie pressed', 900, 'qc_ecg_noise_bl2')
                          }
        self.checkPickleAll()

    def FileNameDF(self):
        import pandas as pd
        import numpy as np
        import glob
        """ Using the folder structure of the participant data, getFileNameDF returns a data frame with the file names of the participant
        Args:
            directory (str): The string of the directory of the participant data

        Returns:
            pd.DataFrame with the ID as index and all the file names

        """
        self.fileNameDF = pd.DataFrame(columns=['ID'])
        self.fileNameDF['ID'] = glob.glob('ARG_*')
        columns = ['MainEvents.csv', 'UserData.xml', 'BlockShooterData.xml', 'JetPackData.xml', 'JumperData.xml',
                   'RacerData.xml', 'WizardsData.xml']
        for column in columns:
            self.fileNameDF[column] = [glob.glob(i + '/*' + column)[0] for i in self.fileNameDF['ID']]
        self.fileNameDF = self.fileNameDF.set_index('ID')
        # fileNames.to_excel("FileNameOutput.xlsx", sheet_name='FileName') #Uncomment to export table to excel
        return self.fileNameDF

    def getFileNames(self):
        return self.fileNameDF

    def getQuestionnaires(self, excel="False"):
        # TODO: Convert numberical columns into numbers
        import pandas as pd
        import xml.etree.ElementTree as ET
        def getQuestionnaireData(questionnaire):
            questionnaireDF = pd.DataFrame(columns=['{}_A{}'.format(questionnaire[:-4], i) for i in range(1, 4)])
            for ID in self.fileNameDF.index:
                xmlName = self.getFilePath(ID, questionnaire)
                tree = ET.parse(xmlName)
                root = tree.getroot()
                questionnaireDF.loc[ID] = [tree.find(child.tag).text for child in root]
                questionnaireDF.index.name = 'ID'
            # if excel:
            #    self.toExcel(questionnaireDF,
            return questionnaireDF

        columns = ['UserData.xml', 'BlockShooterData.xml', 'JetPackData.xml', 'JumperData.xml', 'RacerData.xml',
                   'WizardsData.xml']
        mergedDF = getQuestionnaireData(columns[0])
        for column in columns[1:]:
            newDF = getQuestionnaireData(column)
            mergedDF = pd.merge(mergedDF, newDF, how='outer', left_index=True, right_index=True)
        # mergedDF.to_excel("QuestionnaireDataOutput.xlsx", sheet_name='QuestionnaireData') #Uncomment to export table to excel
        return mergedDF

    def remove(self, l: 'list of IDs to skip'):
        for ID in l:
            if self.checkID(ID):
                self.fileNameDF = self.fileNameDF.drop(ID)
                print('The ID: {} is has been removed'.format(ID))

    def getFilePath(self, ID, fileName):
        return self.fileNameDF.loc[ID][fileName]

    def getARGEventTime(self, ID, event):
        import pandas as pd
        filePath = self.getFilePath(ID, 'MainEvents.csv')
        events = pd.read_csv(filePath)
        events = events.set_index('Event')
        if type(events.loc[event]) == pd.DataFrame and event == 'Filename timestamp sync':
            return events.loc[event].iloc[0][0]  # gets first instance of event
        elif type(events.loc[event]) == pd.DataFrame:
            return events.loc[event].iloc[-1][0]  # gets last instance of event
        else:
            return events.loc[event][0]

    def parsePhysio(self, ID):
        import pandas as pd
        if self.checkID(ID):  # check if ID requested is the in the file name table
            physio = pd.read_csv('{}/{}.txt'.format(ID, ID), header=None, skiprows=8)
            physio.columns = ["Time", "ECG", "SYNC", "EDA", 'TEMP', 'RESP']
            return physio  # data frame with the physio signals

    def picklePhysio(self, ID):
        physio = self.parsePhysio(ID)
        physio.to_pickle('{}/{}.pkl'.format(ID, ID))

    def checkPickle(self, ID):
        import glob
        if len(glob.glob('{}/{}.pkl'.format(ID, ID))) != 1:
            self.picklePhysio(ID)
            print('Pickle for ID: {} created'.format(ID))

    def checkPickleAll(self):
        for ID in self.fileNameDF.index:
            self.checkPickle(ID)

    def checkID(self, ID, show=True):
        if ID in self.fileNameDF.index:
            return True
        else:
            if show:
                print('The ID: {} is not in fileNameDF.'.format(ID))
            return False

    def checkEvent(self, event, show=True):
        if event in self.eventDict.keys():
            return True
        else:
            if show:
                print('The Event: {} is not valid.'.format(event))
            return False

    def unpicklePhysio(self, ID):
        import pandas as pd
        self.checkPickle(ID)
        if self.checkID(ID):
            return pd.read_pickle('{}/{}.pkl'.format(ID, ID))

    def plotECG(self, ID):
        import pandas as pd
        import matplotlib.pyplot as plt

        if self.checkID(ID):
            physio = self.unpicklePhysio(ID)
            fig = plt.figure()
            plt.plot(physio['Time'], physio['ECG'])
            fig.suptitle('{} ECG Raw'.format(ID))
            plt.xlabel('Time (s)')
            plt.ylabel('Amplitude')

    def getOffset(self, ID, physio):
        from scipy.signal import find_peaks
        if self.checkID(ID):
            peaks, _ = find_peaks(physio['SYNC'].values, height=1500)
            physioSyncTime = physio['Time'][peaks[0]]
            argSyncTime = self.getARGEventTime(ID,
                                               'Filename timestamp sync')  # used filename sync instead of first sync on
            return physioSyncTime - argSyncTime
        else:
            return None

    def getOffsetDF(self):
        import pandas as pd
        self.offsetDF = pd.DataFrame(self.fileNameDF.index)
        self.offsetDF['Offset'] = [self.getOffset(ID) for ID in self.fileNameDF.index]
        self.offsetDF = self.offsetDF.set_index('ID')
        return self.offsetDF

    def getARGEventTimesDF(self):
        import pandas as pd
        self.ARGEventTimesDF = pd.DataFrame(
            self.fileNameDF.index)  # Makes a new DF table with ID from the file name table as column
        for event in self.eventDict:  # goes through all the key names (e.g. Sync, BL1, BubbleMod etc..)
            self.ARGEventTimesDF[event] = [self.getARGEventTime(i, self.eventDict[event][0]) for i in
                                           self.ARGEventTimesDF['ID']]
        self.ARGEventTimesDF = self.ARGEventTimesDF.set_index('ID')
        return self.ARGEventTimesDF

    def getOffsetEventTime(self, ID, event, physio):
        # OffsetEventTime = ARGEventTime + Offset
        ARGEventTime = self.getARGEventTime(ID, self.eventDict[event][0])
        offset = self.getOffset(ID, physio)
        return ARGEventTime + offset

    def getOffsetEventTimesDF(self):
        import pandas as pd
        self.getOffsetDF();
        self.getARGEventTimesDF()
        self.offsetEventTimesDF = pd.merge(self.ARGEventTimesDF, self.offsetDF, how='outer', left_index=True,
                                           right_index=True)
        for col in self.offsetEventTimesDF:
            if col != 'Offset':
                self.offsetEventTimesDF[col] = self.offsetEventTimesDF[col] + self.offsetEventTimesDF['Offset']
        self.offsetEventTimesDF = self.offsetEventTimesDF.drop(columns=['Offset'])
        return self.offsetEventTimesDF

    def getPhysioEventIndex(self, ID, event, physio, duration=0, getIndex=True):
        import bisect
        offsetEventTime = self.getOffsetEventTime(ID, event, physio) + duration
        # physioEventTime = min(physio['Time'], key=lambda x:abs(x-offsetEventTime))
        physioEventIndex = bisect.bisect_left(physio['Time'], offsetEventTime)
        if getIndex:
            return physioEventIndex
        else:
            return physio.iloc[physioEventIndex]["Time"]

    def cutPhysio(self, ID, event):
        self.physio = self.unpicklePhysio(ID)
        start = self.getPhysioEventIndex(ID, event, self.physio)
        end = self.getPhysioEventIndex(ID, event, self.physio, self.eventDict[event][1], True)
        return self.physio.iloc[start:end]

    def plotEvent(self, ID, event, signal='ECG'):
        import pandas as pd
        import matplotlib.pyplot as plt
        import matplotlib
        # matplotlib.use('TkAgg')
        plt.interactive(True)
        if self.checkID(ID) and self.checkEvent(event):
            fig = plt.figure()
            physio = self.cutPhysio(ID, event)
            plt.plot(physio['Time'], physio[signal])
            fig.suptitle('{} {} during {}'.format(ID, signal, event))
            plt.xlabel('Time (s)')
            plt.ylabel('Amplitude')
            plt.show()

    def processECG(self, ID, event):
        from biosppy.signals import ecg
        physio = self.cutPhysio(ID, event)
        out = ecg.ecg(signal=physio['ECG'].values, sampling_rate=2024, show=False)
        ecgDict = out.as_dict()
        return ecgDict

    def getHeartRate(self, ID, event):
        import numpy as np
        ecgDict = self.processECG(ID, event)
        return ecgDict['heart_rate'].mean()

    def getHeartRateDF(self):
        self.heartRateDF = pd.DataFrame(self.fileNameDF.index)
        for event in self.eventDict:  # goes through all the key names (e.g. Sync, BL1, BubbleMod etc..)
            if event != 'Sync':
                self.heartRateDF[event] = [self.getHeartRate(ID, event) for ID in self.heartRateDF['ID']]
                print(event)
        self.heartRateDF = self.heartRateDF.set_index('ID')
        return self.heartRateDF

    def getQC(self, filePath):  # from excel
        import pandas as pd
        self.qcTable = pd.read_csv(filePath, keep_default_na=True, dtype={'qc_ecg_noise_bl1': str,
                                                                          'qc_ecg_noise_bubblesmod': str,
                                                                          'qc_ecg_noise_jetpackmod': str,
                                                                          'qc_ecg_noise_astromod': str,
                                                                          'qc_ecg_noise_kartmod': str,
                                                                          'qc_ecg_noise_wizardmod': str,
                                                                          'qc_ecg_noise_bubblespre': str,
                                                                          'qc_ecg_noise_jetpackpre': str,
                                                                          'qc_ecg_noise_astropre': str,
                                                                          'qc_ecg_noise_kartpre': str,
                                                                          'qc_ecg_noise_wizardpre': str,
                                                                          'qc_ecg_noise_bubblespost': str,
                                                                          'qc_ecg_noise_jetpackpost': str,
                                                                          'qc_ecg_noise_astropost': str,
                                                                          'qc_ecg_noise_kartpost': str,
                                                                          'qc_ecg_noise_wizardpost': str,
                                                                          'qc_ecg_noise_bl2': str})
        self.qcTable = self.qcTable.set_index("participant_id")
        for ID in self.qcTable.index.values:
            if not self.checkID(ID, False):
                self.qcTable = self.qcTable.drop(
                    ID)  # remove all the participants that do not yet have physio data (i.e. not in fileNameDF)
        return self.qcTable

    def calcNoise(self, ID, event):
        try:
            import pandas as pd
            import numpy as np
            noiseStr = self.qcTable.loc[ID][self.eventDict[event][2]]
            if noiseStr == "0" or noiseStr == 0 or noiseStr == "":
                return 0
            elif pd.Series([noiseStr]).equals(pd.Series([np.NaN])):
                return "TBD"
            else:
                noiseStr = noiseStr.replace(' ', '').strip(',')
                noiseSplit = noiseStr.split(',')
                totalNoise = 0
                for interval in noiseSplit:
                    if len(interval.split('-')) == 2:
                        a, b = interval.split('-')
                        totalNoise += float(b) - float(a)
                return totalNoise
        except:
            return "Error"

    def getQCDF(self, token):
        import pandas as pd
        self.getQCRedcap(token)
        d = self.eventDict.copy()
        del d["Sync"]
        self.qcDF = pd.DataFrame(columns=[event + "Time" for event in d] + [event + "PercentNoise" for event in d])
        for ID in self.qcTable.index.values:
            self.qcDF.loc[ID] = [self.calcNoise(ID, event) for event in d] + [self.percentNoise(ID, event) for event in
                                                                              d]
        # self.qcDF = self.qcDF.set_index('ID')
        return self.qcDF

    def testParticipant(self, ID):
        d = self.eventDict.copy()
        del d["Sync"]
        for event in d:
            print(event)
            print(self.calcNoise(ID, event))

    def percentNoise(self, ID, event):
        try:
            noise = self.calcNoise(ID, event)
            if noise == "TBD":
                return "TBD"
            else:
                return noise / self.eventDict[event][1] * 100
        except:
            return "Error"

    def getParticipantIDs(self):
        return self.fileNameDF.index.values

    def toExcel(self, table, directory, fileName):
        import pandas as pd
        table.to_excel("{}/{}.xlsx".format(directory, fileName))

    def getQCRedcap(self, token):
        import requests
        import pandas as pd
        from io import StringIO
        records = list(self.fileNameDF.index.values)
        data = {
            'token': token,
            'content': 'record',
            'format': 'csv',
            'type': 'flat',
            'forms[0]': 'qc_ecg',
            'fields[0]': 'participant_id',
            'rawOrLabel': 'raw',
            'rawOrLabelHeaders': 'raw',
            'exportCheckboxLabel': 'false',
            'exportSurveyFields': 'false',
            'exportDataAccessGroups': 'false',
            'returnFormat': 'csv'
        }
        i = 0
        for record in records:
            data["records[{}]".format(i)] = record
            i += 1
        r = requests.post('https://redcap.hollandbloorview.ca/api/', data)
        self.qcTable = pd.read_csv(StringIO(r.text))
        self.qcTable = self.qcTable.set_index('participant_id')

        return self.qcTable

# ARG = ARGAnalysis('Z:/ARG (CTO 1610)/Volunteer_Work')
ARG = ARGAnalysis('/Users/jenny/Documents/Work/2019-HollandBloorviewRA/AzadehKushki/ARG/Sample Data')
# ARG.toExcel(ARG.getQuestionnaires(), "/Users/jenny/Desktop", "test.xlsx")
# ARG.plotEvent("ARG_002", "BL1")
# ARG.getQC('/Users/jenny/Downloads/ARGCTO1610_DATA_2019-08-28_0731.csv')

#table = ARG.getQCDF('C0210B5BDD6B1E40D8C82F185BF2DCAE')
#ARG.toExcel(table, "/Users/jenny/Desktop", "test.xlsx")


#from ARGAnalysis import ARGAnalysis





In [2]:
ARG.getFileNames()

Unnamed: 0_level_0,MainEvents.csv,UserData.xml,BlockShooterData.xml,JetPackData.xml,JumperData.xml,RacerData.xml,WizardsData.xml
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ARG_999,ARG_999/20190711-094718-MainEvents.csv,ARG_999/201907110947-UserData.xml,ARG_999/201907111014-BlockShooterData.xml,ARG_999/201907111018-JetPackData.xml,ARG_999/201907111021-JumperData.xml,ARG_999/201907111025-RacerData.xml,ARG_999/201907111028-WizardsData.xml
ARG_002,ARG_002/20190726-100624-MainEvents.csv,ARG_002/201907261008-UserData.xml,ARG_002/201907261039-BlockShooterData.xml,ARG_002/201907261043-JetPackData.xml,ARG_002/201907261046-JumperData.xml,ARG_002/201907261050-RacerData.xml,ARG_002/201907261053-WizardsData.xml
