In [3]:
from datetime import datetime, timedelta
from pathlib import Path
import os
import sys
import numpy as np
import pandas as pd
import re
import AMBRA_Backups
from bs4 import BeautifulSoup
import AMBRA_Utils
from redcap_funcs import comp_schema_cap_db, get_project_schema

db_name = 'CAPTIVA'
db = AMBRA_Backups.database.Database(db_name)
project_name = 'CAPTIVA DC'
project = AMBRA_Backups.redcap_funcs.get_redcap_project(project_name) 

def q(query, record=None):
    if any(substring in query for substring in ['UPDATE', 'INSERT', 'DELETE']):
        return db.run_insert_query(query, record)
    elif 'SELECT' in query:
        return pd.DataFrame(db.run_select_query(query, record, column_names=True))
    else:
        print('You dont have UPDATE, INSERT, DELETE or SELECT in your query my guy')

def display_md(df):
    return print(df.to_markdown())

In [16]:
db.run_select_query("""SELECT redcap_variable FROM CRF_Data_RedCap WHERE id_crf = %s""", ['10793'])

[('follow_up_brain_crf_complete',),
 ('fu_brain_qc',),
 ('q3001',),
 ('q3001a',),
 ('q3002',),
 ('q3003',),
 ('q3004',),
 ('q3006',),
 ('q3008_col1(ind)',),
 ('q3008_col1(l_car_it)',),
 ('q3008_col1(others)',),
 ('q3008_col1(r_car_it)',),
 ('q3008_col1(ven)',),
 ('q3008_col1(vert_bas_it)',),
 ('q3010',),
 ('q3011_a_col1(vert_bas_it)',),
 ('q3011_c_col1(cort_r_pca)',),
 ('q3011_c_col1(r_sup_cerebeller)',),
 ('q3011_c_col2_2(cort_l_pca)',),
 ('q3015',),
 ('q3018',),
 ('q3019',)]

In [3]:
project = AMBRA_Backups.redcap_funcs.get_redcap_project('CAPTIVA DC')
project_name = project.export_project_info()['project_title'].strip()
db_name = 'CAPTIVA'
db = AMBRA_Backups.database.Database(db_name)
db.run_insert_query('UPDATE backup_info_RedCap SET last_backup = %s WHERE project_name = %s', [datetime.now(), project_name])

0

In [6]:
## schema comp sanity check

db = AMBRA_Backups.database.Database(db_name)
project = AMBRA_Backups.redcap_funcs.get_redcap_project(project_name)

forms = [f['instrument_name'] for f in project.export_instruments()]


master_discreps = ''

for crf_name in forms:

    # redcap_variable discrepancies
    unique_data_vars = pd.DataFrame(db.run_select_query("""SELECT DISTINCT(redcap_variable) 
        FROM CRF_RedCap
        JOIN CRF_Data_RedCap
            ON CRF_RedCap.id = CRF_Data_RedCap.id_crf
        WHERE crf_name = %s""", [crf_name], column_names=True))
    var_discrep_string = ''
    if not unique_data_vars.empty:
        unique_data_vars = unique_data_vars['redcap_variable']
        schema_vars = pd.DataFrame(db.run_select_query("""SELECT redcap_variable FROM CRF_Schema_RedCap
            WHERE crf_name = %s""", [crf_name], column_names=True))['redcap_variable']

        var_discreps = unique_data_vars[~unique_data_vars.isin(schema_vars)].to_list()
        if var_discreps:
            # redcap_variables inside the data table might not have a schema variable to coorispond to, but might have an active crf_id
            # So the non-included redcap_variable will be attached to a csv report if not taken out of the data table, or have the schema corrected. Case by case 
            var_discrep_string = f"\nThe following CRF_Data_RedCap.redcap_variable's are not in CRF_schema_RedCap.redcap_variable's:\n{var_discreps}\n\n"

    print('redcap_variables')
    print('CRF_Data_RedCap')
    display(unique_data_vars.to_frame())
    print('CRF_Schema_RedCap')
    display(schema_vars.to_frame())



    # question text discrepancies
    schema_questions = pd.DataFrame(db.run_select_query("""SELECT question_text, redcap_variable FROM CRF_Schema_RedCap
                                    WHERE crf_name = %s AND question_text IS NOT NULL""", [crf_name], column_names=True))
    schema_questions['variable-value'] = schema_questions['redcap_variable']+schema_questions['question_text']

    api_questions = pd.DataFrame(project.export_metadata())
    api_questions = api_questions[api_questions['form_name'] == crf_name]
    field_names = pd.DataFrame(project.export_field_names())
    field_names.rename(columns={'original_field_name': 'field_name'}, inplace=True)
    # api_questions = pd.merge(field_names, api_questions, on='field_name')
    api_questions = pd.merge(api_questions, field_names, on='field_name', how='left')

    # replacing
    def val_to_text(row):
        if row['field_type'] == 'checkbox':
            dic = {op.split(',')[0].strip() : op.split(',')[1].strip() for op in row['select_choices_or_calculations'].split('|')}
            return dic[row['choice_value']]
        else:
            return row['select_choices_or_calculations']
    api_questions['select_choices_or_calculations'] = api_questions.apply(val_to_text, axis=1)

    def replace_seperators(row):
        if row['field_type'] == 'radio':
            return row['select_choices_or_calculations'].replace(',', '=')
        else:
            return row['select_choices_or_calculations']
    api_questions['select_choices_or_calculations'] = api_questions.apply(replace_seperators, axis=1)

    api_questions.loc[(api_questions['field_type'] == 'checkbox') | 
                    (api_questions['field_type'] == 'radio') | 
                    (api_questions['field_type'] == 'yesno'), 'data_type'] = 'int'
    api_questions.loc[api_questions['field_type'] == 'text', 'data_type'] = 'string'

    api_questions.loc[api_questions['export_field_name'].str.contains('___', na=False), 'export_field_name'] = api_questions['export_field_name'].apply(lambda x: 
                                                                                                        x.split('___')[0] if isinstance(x, str) else x)+'('+api_questions['choice_value']+')'
    api_questions['redcap_variable'] = api_questions['export_field_name']
    api_questions

    def only_html(row):
        soup = BeautifulSoup(row['field_label'], 'html.parser')
        if bool(soup.find()):
            return row['field_label']
    master_html = ''.join(api_questions.apply(only_html, axis=1).dropna().values.tolist())
    api_questions = api_questions[(api_questions['field_type'] != 'descriptive') &
                                    ~(api_questions['field_label'].str.contains('record', case=False)) &
                                (~api_questions['field_name'].apply(lambda x: x in master_html))][['redcap_variable','field_label']]
    api_questions['variable-value'] = api_questions['redcap_variable']+api_questions['field_label']

    question_discreps = api_questions[~api_questions['variable-value'].isin(schema_questions['variable-value'])]
    ques_discrep_string = ''
    if not question_discreps.empty:
        discrep_dict = {v[0]:v[1] for v in question_discreps.values}
        ques_discrep_string = f"\nThe following api-metadata question_text's are not in CRF_Schema_RedCap.question_text's:\n{{redcap_variable : question_text}}\n\n{discrep_dict}\n\n"  

    print('question_text')
    print('schema_questions')
    display(schema_questions)
    print('api_questions')
    display(api_questions.reset_index())


    # radio button option discrepancies
    schema_radio_options = pd.DataFrame(db.run_select_query("""SELECT * FROM CRF_Schema_RedCap
                                WHERE crf_name = %s AND question_type = 'radio'""", [crf_name], column_names=True))
    radio_discrep_string = ''
    if not schema_radio_options.empty:
        schema_radio_options = schema_radio_options['data_labels']
        def schema_rep_seps(string_ops):
            return '|'.join([ss.split('=')[0].strip()+'='+'='.join(ss.split('=')[1:]).strip() for ss in string_ops.split('|')])
        schema_radio_options = schema_radio_options.apply(schema_rep_seps)

        api_radio_options = pd.DataFrame(project.metadata)
        api_radio_options = api_radio_options[(api_radio_options['form_name'] == crf_name) & 
                                            (api_radio_options['field_type'] == 'radio')][['field_name', 'select_choices_or_calculations']]

        def api_rep_seps(string_ops):
            return '|'.join([ss.split(',')[0].strip()+'='+','.join(ss.split(',')[1:]).strip() for ss in string_ops.split('|')])
        api_radio_options['select_choices_or_calculations'] = api_radio_options['select_choices_or_calculations'].apply(api_rep_seps)

        radio_discreps = api_radio_options[~api_radio_options['select_choices_or_calculations'].isin(schema_radio_options)]
        if not radio_discreps.empty:
            discrep_dict = {v[0]:v[1] for v in radio_discreps.values}
            radio_discrep_string = f"The following api-metadata radio button options's are not in CRF_Schema_RedCap.data_labels's(radio button options):\n{{redcap_variable : select_choices_or_calculations}}\n\n{discrep_dict}\n"

    print('radio button options')
    print('schema_radio_options')
    display(schema_radio_options.to_frame())
    print('api_radio_options')
    display(api_radio_options.reset_index())

    form_discrepancies = var_discrep_string + ques_discrep_string + radio_discrep_string
    if form_discrepancies:
        master_discreps += f'\n{crf_name:-^{40}}\n{form_discrepancies}'


if master_discreps:
    print('====================================================================')
    print('====================================================================')
    print(master_discreps)
    print('====================================================================')
    print('====================================================================')
    raise Exception('Please handle the above discrepancies')

redcap_variables
CRF_Data_RedCap


Unnamed: 0,redcap_variable
0,ct_comments
1,ct_complete
2,ct_init
3,ct_status
4,l_aica_ct(1)
...,...
209,r_prox_nondom_m2_ct(1)
210,r_sca_ct(1)
211,r_top_bas_ct(1)
212,r_vert_art_ct(1)


CRF_Schema_RedCap


Unnamed: 0,redcap_variable
0,ct_comments
1,ct_complete
2,ct_init
3,ct_status
4,l_aica_ct(1)
...,...
210,r_sca_ct(1)
211,r_top_bas_ct(1)
212,r_vert_art_ct(1)
213,r_vert_bas_jun_ct(1)


question_text
schema_questions


  soup = BeautifulSoup(row['field_label'], 'html.parser')


Unnamed: 0,question_text,redcap_variable,variable-value
0,CRP comments,ct_comments,ct_commentsCRP comments
1,CRP initials,ct_init,ct_initCRP initials
2,CRF status,ct_status,ct_statusCRF status
3,AICA,l_aica_ct(1),l_aica_ct(1)AICA
4,Anterior Temporal Artery,l_ant_temp_art_ct(1),l_ant_temp_art_ct(1)Anterior Temporal Artery
...,...,...,...
207,SCA,r_sca_ct(1),r_sca_ct(1)SCA
208,Top of Basilar (substantial segment of Basilar...,r_top_bas_ct(1),r_top_bas_ct(1)Top of Basilar (substantial seg...
209,Vertebral Artery,r_vert_art_ct(1),r_vert_art_ct(1)Vertebral Artery
210,Vertebral Basilar Junction (proximal to the AICA),r_vert_bas_jun_ct(1),r_vert_bas_jun_ct(1)Vertebral Basilar Junction...


api_questions


Unnamed: 0,index,redcap_variable,field_label,variable-value
0,1,q1001,Timepoint,q1001Timepoint
1,2,q1002(1),Scan Type,q1002(1)Scan Type
2,3,q1002(2),Scan Type,q1002(2)Scan Type
3,4,q1002(3),Scan Type,q1002(3)Scan Type
4,5,q1002(4),Scan Type,q1002(4)Scan Type
...,...,...,...,...
79,213,q1099,Signature of Central Reader,q1099Signature of Central Reader
80,214,q1100,Reviewer's general comments,q1100Reviewer's general comments
81,215,ct_status,CRF status,ct_statusCRF status
82,216,ct_comments,CRP comments,ct_commentsCRP comments


radio button options
schema_radio_options


Unnamed: 0,data_labels
0,1=Unread|2=Query generated|3=Read - Unverified...
1,1=Baseline SOC
2,0=Abnormal|1=Normal
3,0=Abnormal|1=Normal
4,0=Abnormal|1=Normal
5,0=Abnormal|1=Normal
6,0=Abnormal|1=Normal
7,0=Abnormal|1=Normal
8,0=Abnormal|1=Normal
9,0=Abnormal|1=Normal


api_radio_options


Unnamed: 0,index,field_name,select_choices_or_calculations
0,1,q1001,1=Baseline SOC
1,61,q1019_cau,0=Abnormal|1=Normal
2,62,q1020_len_nuc,0=Abnormal|1=Normal
3,63,q1021_ins_rib,0=Abnormal|1=Normal
4,64,q1022_int_cap,0=Abnormal|1=Normal
5,65,q1023_m1,0=Abnormal|1=Normal
6,66,q1024_m2,0=Abnormal|1=Normal
7,67,q1025_m3,0=Abnormal|1=Normal
8,68,q1026_m4,0=Abnormal|1=Normal
9,69,q1027_m5,0=Abnormal|1=Normal


redcap_variables
CRF_Data_RedCap


Unnamed: 0,redcap_variable
0,l_aica_mr(1)
1,l_ant_temp_art(1)
2,l_callosomarginal(1)
3,l_car_bif_mr(1)
4,l_cavernous_ica(1)
...,...
356,r_vert_art(1)
357,r_vert_bas_jun(1)
358,q2018(-10)
359,q2029_mr(-10)


CRF_Schema_RedCap


Unnamed: 0,redcap_variable
0,l_aica_mr(1)
1,l_ant_temp_art(1)
2,l_callosomarginal(1)
3,l_car_bif_mr(1)
4,l_cavernous_ica(1)
...,...
353,r_prox_nondom_m2(1)
354,r_sca_mr(1)
355,r_top_bas(1)
356,r_vert_art(1)


question_text
schema_questions


  soup = BeautifulSoup(row['field_label'], 'html.parser')


Unnamed: 0,question_text,redcap_variable,variable-value
0,AICA,l_aica_mr(1),l_aica_mr(1)AICA
1,Anterior Temporal Artery,l_ant_temp_art(1),l_ant_temp_art(1)Anterior Temporal Artery
2,Callosomarginal,l_callosomarginal(1),l_callosomarginal(1)Callosomarginal
3,Carotid Bifurcation,l_car_bif_mr(1),l_car_bif_mr(1)Carotid Bifurcation
4,Cavernous ICA,l_cavernous_ica(1),l_cavernous_ica(1)Cavernous ICA
...,...,...,...
352,Prox Non-Dominant M2,r_prox_nondom_m2(1),r_prox_nondom_m2(1)Prox Non-Dominant M2
353,SCA,r_sca_mr(1),r_sca_mr(1)SCA
354,Top of Basilar (substantial segment of Basilar...,r_top_bas(1),r_top_bas(1)Top of Basilar (substantial segmen...
355,Vertebral Artery,r_vert_art(1),r_vert_art(1)Vertebral Artery


api_questions


Unnamed: 0,index,redcap_variable,field_label,variable-value
0,0,q2001,Time point,q2001Time point
1,1,q2002(1),Scan Type,q2002(1)Scan Type
2,2,q2002(2),Scan Type,q2002(2)Scan Type
3,3,q2002(3),Scan Type,q2002(3)Scan Type
4,4,q2002(4),Scan Type,q2002(4)Scan Type
...,...,...,...,...
103,366,q2129,Signature of Central Reader,q2129Signature of Central Reader
104,367,q2131,Reviewer's general comments,q2131Reviewer's general comments
105,368,mr_status,CRF status,mr_statusCRF status
106,369,mri_comments,CRP comments,mri_commentsCRP comments


radio button options
schema_radio_options


Unnamed: 0,data_labels
0,1=Unread|2=Query generated|3=Read - Unverified...
1,1=Hematoma|2=Petechiae hemorrhage|3=Other|4=None
2,1=GRE|2=SWI|3=SWAN|4=Other
3,0=No|1=Yes|2=Yes (TNTC)|-9=N/A (No SWI sequenc...
4,1=Baseline SOC|2=Follow-Up|3=Unscheduled
5,1=Yes|2=Limited Sequences|3=Limited Sequences-...
6,1=Yes|0=No|-9=NA - DWI Sequence not obtained
7,1=Subcortical|2=Cortical|3=Both
8,1=Single|2=Multiple
9,0=Abnormal|1=Normal


api_radio_options


Unnamed: 0,index,field_name,select_choices_or_calculations
0,195,q2001,1=Baseline SOC|2=Follow-Up|3=Unscheduled
1,202,q2008,1=Yes|2=Limited Sequences|3=Limited Sequences-...
2,239,q2016,1=Yes|0=No|-9=NA - DWI Sequence not obtained
3,240,q2017,1=Subcortical|2=Cortical|3=Both
4,241,q2018a,1=Single|2=Multiple
5,283,q2020_cau,0=Abnormal|1=Normal
6,284,q2021_len_nuc,0=Abnormal|1=Normal
7,285,q2022_ins_rib,0=Abnormal|1=Normal
8,286,q2023_int_cap,0=Abnormal|1=Normal
9,287,q2024_m1,0=Abnormal|1=Normal


redcap_variables
CRF_Data_RedCap


Unnamed: 0,redcap_variable
0,evt_comments
1,evt_complete
2,evt_init
3,evt_status
4,l_aica_evt_10(1)
...,...
1220,r_vert_bas_jun_evt_6(1)
1221,r_vert_bas_jun_evt_7(1)
1222,r_vert_bas_jun_evt_8(1)
1223,r_vert_bas_jun_evt_9(1)


CRF_Schema_RedCap


Unnamed: 0,redcap_variable
0,evt_comments
1,evt_complete
2,evt_init
3,evt_status
4,l_aica_evt_10(1)
...,...
1220,r_vert_bas_jun_evt_6(1)
1221,r_vert_bas_jun_evt_7(1)
1222,r_vert_bas_jun_evt_8(1)
1223,r_vert_bas_jun_evt_9(1)


question_text
schema_questions


  soup = BeautifulSoup(row['field_label'], 'html.parser')


Unnamed: 0,question_text,redcap_variable,variable-value
0,CRP comments,evt_comments,evt_commentsCRP comments
1,CRP initials,evt_init,evt_initCRP initials
2,CRF status,evt_status,evt_statusCRF status
3,Please select site of intracranial dissecition:,l_aica_evt_10(1),l_aica_evt_10(1)Please select site of intracra...
4,Please select site of bleeding/contrast extrav...,l_aica_evt_11(1),l_aica_evt_11(1)Please select site of bleeding...
...,...,...,...
1219,Please select site of intracranial dissection ...,r_vert_bas_jun_evt_6(1),r_vert_bas_jun_evt_6(1)Please select site of i...
1220,Please select proximal site of occlusion(s):,r_vert_bas_jun_evt_7(1),r_vert_bas_jun_evt_7(1)Please select proximal ...
1221,Please select site of confirmed emboli to new ...,r_vert_bas_jun_evt_8(1),r_vert_bas_jun_evt_8(1)Please select site of c...
1222,Please select site of bleeding/contrast extrav...,r_vert_bas_jun_evt_9(1),r_vert_bas_jun_evt_9(1)Please select site of b...


api_questions


Unnamed: 0,index,redcap_variable,field_label,variable-value
0,0,q3002,Date of first image acquired,q3002Date of first image acquired
1,1,q3003,"Ipsilateral to ischemic hemisphere, does initi...","q3003Ipsilateral to ischemic hemisphere, does ..."
2,2,q3005,"If ""Yes,"" etiology","q3005If ""Yes,"" etiology"
3,3,q3006,"If ""Other,"" specify","q3006If ""Other,"" specify"
4,4,q3007(1),Indicate segment(s) containing the maximum deg...,q3007(1)Indicate segment(s) containing the max...
5,5,q3007(2),Indicate segment(s) containing the maximum deg...,q3007(2)Indicate segment(s) containing the max...
6,6,q3007(3),Indicate segment(s) containing the maximum deg...,q3007(3)Indicate segment(s) containing the max...
7,7,q3007(4),Indicate segment(s) containing the maximum deg...,q3007(4)Indicate segment(s) containing the max...
8,8,q3007(5),Indicate segment(s) containing the maximum deg...,q3007(5)Indicate segment(s) containing the max...
9,9,q3007(6),Indicate segment(s) containing the maximum deg...,q3007(6)Indicate segment(s) containing the max...


radio button options
schema_radio_options


Unnamed: 0,data_labels
0,1=Unread|2=Query generated|3=Read - Unverified...
1,1=< 50%|2=50-70%|3=71-99%|4=100%|0=No
2,1=Atheromatous disease|2=Dissection|3=Other
3,0=None|1=< 50%|2=50-70%|3=71-99%|4=100%
4,0=0|1=1|2=2A|3=2B|4=2C|5=3
5,1=Iatrogenic|2=Underlying|8=Unknown|0=No
6,1=Iatrogenic|2=Underlying|8=Unknown|0=No
7,1=Iatrogenic|2=Underlying|8=Unknown|0=No
8,0=0|1=1|2=2A|3=2B 50|4=2B 67|5=2C|6=3
9,1=Iatrogenic|2=Underlying|8=Unknown


api_radio_options


Unnamed: 0,index,field_name,select_choices_or_calculations
0,528,q3003,1=< 50%|2=50-70%|3=71-99%|4=100%|0=No
1,529,q3005,1=Atheromatous disease|2=Dissection|3=Other
2,533,q3009,0=None|1=< 50%|2=50-70%|3=71-99%|4=100%
3,535,q3011,0=0|1=1|2=2A|3=2B|4=2C|5=3
4,917,q3027,1=Iatrogenic|2=Underlying|8=Unknown|0=No
5,1222,q3042,1=Iatrogenic|2=Underlying|8=Unknown|0=No
6,1376,q3051,1=Iatrogenic|2=Underlying|8=Unknown|0=No
7,1453,q3056,0=0|1=1|2=2A|3=2B 50|4=2B 67|5=2C|6=3
8,1682,q3067,1=Iatrogenic|2=Underlying|8=Unknown
9,1760,evt_status,1=Unread|2=Query generated|3=Read - Unverified...


redcap_variables
CRF_Data_RedCap


Unnamed: 0,redcap_variable
0,acu_hem_fu
1,acu_hem_loc(1)
2,acu_hem_loc(10)
3,acu_hem_loc(2)
4,acu_hem_loc(3)
5,acu_hem_loc(4)
6,acu_hem_loc(5)
7,acu_hem_loc(6)
8,acu_hem_loc(7)
9,acu_hem_loc(8)


CRF_Schema_RedCap


Unnamed: 0,redcap_variable
0,acu_hem_fu
1,acu_hem_loc(1)
2,acu_hem_loc(10)
3,acu_hem_loc(2)
4,acu_hem_loc(3)
5,acu_hem_loc(4)
6,acu_hem_loc(5)
7,acu_hem_loc(6)
8,acu_hem_loc(7)
9,acu_hem_loc(8)


question_text
schema_questions


Unnamed: 0,question_text,redcap_variable,variable-value
0,Acute hemorrhage seen on image?,acu_hem_fu,acu_hem_fuAcute hemorrhage seen on image?
1,"If ""Yes,"" check all that apply",acu_hem_loc(1),"acu_hem_loc(1)If ""Yes,"" check all that apply"
2,"If ""Yes,"" check all that apply",acu_hem_loc(10),"acu_hem_loc(10)If ""Yes,"" check all that apply"
3,"If ""Yes,"" check all that apply",acu_hem_loc(2),"acu_hem_loc(2)If ""Yes,"" check all that apply"
4,"If ""Yes,"" check all that apply",acu_hem_loc(3),"acu_hem_loc(3)If ""Yes,"" check all that apply"
5,"If ""Yes,"" check all that apply",acu_hem_loc(4),"acu_hem_loc(4)If ""Yes,"" check all that apply"
6,"If ""Yes,"" check all that apply",acu_hem_loc(5),"acu_hem_loc(5)If ""Yes,"" check all that apply"
7,"If ""Yes,"" check all that apply",acu_hem_loc(6),"acu_hem_loc(6)If ""Yes,"" check all that apply"
8,"If ""Yes,"" check all that apply",acu_hem_loc(7),"acu_hem_loc(7)If ""Yes,"" check all that apply"
9,"If ""Yes,"" check all that apply",acu_hem_loc(8),"acu_hem_loc(8)If ""Yes,"" check all that apply"


api_questions


Unnamed: 0,index,redcap_variable,field_label,variable-value
0,0,q4001,Date of Imaging,q4001Date of Imaging
1,1,q4002,Time of Imaging,q4002Time of Imaging
2,2,q4003(1),Scan Type,q4003(1)Scan Type
3,3,q4003(2),Scan Type,q4003(2)Scan Type
4,4,q4003(3),Scan Type,q4003(3)Scan Type
5,5,q4003(4),Scan Type,q4003(4)Scan Type
6,6,q4003(5),Scan Type,q4003(5)Scan Type
7,7,q4003(6),Scan Type,q4003(6)Scan Type
8,8,q4003(7),Scan Type,q4003(7)Scan Type
9,9,q4004,"If ""Other,"" specify","q4004If ""Other,"" specify"


radio button options
schema_radio_options


Unnamed: 0,data_labels
0,0=No|1=Yes|3=Possible (hemorrhage vs contrast ...
1,0=Abnormal|1=Normal
2,1=Unread|2=Query generated|3=Read - Unverified...
3,0=Abnormal|1=Normal
4,0=Abnormal|1=Normal
5,0=Abnormal|1=Normal
6,0=Abnormal|1=Normal
7,0=Abnormal|1=Normal
8,0=Abnormal|1=Normal
9,0=Abnormal|1=Normal


api_radio_options


Unnamed: 0,index,field_name,select_choices_or_calculations
0,1780,cau_hea,0=Abnormal|1=Normal
1,1781,len_nuc,0=Abnormal|1=Normal
2,1782,ins_rib,0=Abnormal|1=Normal
3,1783,int_cap,0=Abnormal|1=Normal
4,1784,m1,0=Abnormal|1=Normal
5,1785,m2,0=Abnormal|1=Normal
6,1786,m3,0=Abnormal|1=Normal
7,1787,m4,0=Abnormal|1=Normal
8,1788,m5,0=Abnormal|1=Normal
9,1789,m6,0=Abnormal|1=Normal


redcap_variables
CRF_Data_RedCap


Unnamed: 0,redcap_variable
0,ambra_img_url
1,is_phantom
2,is_test
3,lab_use_only_complete


CRF_Schema_RedCap


Unnamed: 0,redcap_variable
0,ambra_img_url
1,is_phantom
2,is_test
3,lab_use_only_complete


question_text
schema_questions


Unnamed: 0,question_text,redcap_variable,variable-value
0,Ambra image URL,ambra_img_url,ambra_img_urlAmbra image URL
1,Is a phantom?,is_phantom,is_phantomIs a phantom?
2,Is a test scan?,is_test,is_testIs a test scan?


api_questions


Unnamed: 0,index,redcap_variable,field_label,variable-value
0,0,ambra_img_url,Ambra image URL,ambra_img_urlAmbra image URL
1,1,is_test,Is a test scan?,is_testIs a test scan?
2,2,is_phantom,Is a phantom?,is_phantomIs a phantom?


radio button options
schema_radio_options


Unnamed: 0,data_labels
0,0=No|1=Yes
1,0=No|1=Yes


api_radio_options


Unnamed: 0,index,field_name,select_choices_or_calculations
0,1800,is_test,0=No|1=Yes
1,1801,is_phantom,0=No|1=Yes



-------------------mr-------------------

The following CRF_Data_RedCap.redcap_variable's are not in CRF_schema_RedCap.redcap_variable's:
['q2018(-10)', 'q2029_mr(-10)', 'q1036_mr(3)']


The following api-metadata question_text's are not in CRF_Schema_RedCap.question_text's:
{redcap_variable : question_text}

{'q2018(-10)': 'White matter hyperintensity (WMH) Periventricular?', 'q2029_mr(-10)': 'White matter hyperintensity (WMH) deep white matter (DWM)?'}




Exception: Please handle the above discrepancies

In [45]:
crf_name = 'mr'

schema_questions = pd.DataFrame(db.run_select_query("""SELECT question_text, redcap_variable FROM CRF_Schema_RedCap
                                WHERE crf_name = %s AND question_text IS NOT NULL""", [crf_name], column_names=True))
schema_questions['variable-value'] = schema_questions['redcap_variable']+schema_questions['question_text']

api_questions = pd.DataFrame(project.export_metadata())
api_questions = api_questions[api_questions['form_name'] == crf_name]
field_names = pd.DataFrame(project.export_field_names())
field_names.rename(columns={'original_field_name': 'field_name'}, inplace=True)
# api_questions = pd.merge(field_names, api_questions, on='field_name')
api_questions = pd.merge(api_questions, field_names, on='field_name', how='left')
api_questions

Unnamed: 0,field_name,form_name,section_header,field_type,field_label,select_choices_or_calculations,field_note,text_validation_type_or_show_slider_number,text_validation_min,text_validation_max,identifier,branching_logic,required_field,custom_alignment,question_number,matrix_group_name,matrix_ranking,field_annotation,choice_value,export_field_name
0,q2001,mr,,radio,Time point,"1, Baseline SOC | 2, Follow-Up | 3, Unscheduled",,,,,,,,RH,,,,"@DEFAULT=""1""",,q2001
1,q2002,mr,,checkbox,Scan Type,"1, MRI | 2, MRA | 3, MRP | 4, Others",,,,,,,,RH,,,,,1,q2002___1
2,q2002,mr,,checkbox,Scan Type,"1, MRI | 2, MRA | 3, MRP | 4, Others",,,,,,,,RH,,,,,2,q2002___2
3,q2002,mr,,checkbox,Scan Type,"1, MRI | 2, MRA | 3, MRP | 4, Others",,,,,,,,RH,,,,,3,q2002___3
4,q2002,mr,,checkbox,Scan Type,"1, MRI | 2, MRA | 3, MRP | 4, Others",,,,,,,,RH,,,,,4,q2002___4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,q2129,mr,,text,Signature of Central Reader,,,,,,,,,RH,,,,,,q2129
367,q2131,mr,,notes,Reviewer's general comments,,,,,,,,,RH,,,,,,q2131
368,mr_status,mr,"<div class=""rich-text-field-label""><p><strong>...",radio,CRF status,"1, Unread | 2, Query generated | 3, Read - Unv...",,,,,,,,,,,,@IF([user-role-label] = 'Admin' or user-role-l...,,mr_status
369,mri_comments,mr,,notes,CRP comments,,,,,,,,,,,,,@IF([user-role-label] = 'Admin' or user-role-l...,,mri_comments


In [63]:

patient_name = '11003'
patient_id = '6037'
instance = 1
verified = ''

form_df = AMBRA_Backups.redcap_funcs.get_form_df(project, patient_name, crf_name, instance)
# if form_df.empty: # if empty, means there is no live data for this patient and a deleted log should appear later
#     continue 
# crf_id = db.run_insert_query(f"""INSERT INTO CRF_RedCap (id_patient, crf_name, instance, verified, deleted) VALUES 
#                                 (%s, %s, {'NULL' if instance is None else instance}, %s, %s)""", [patient_id, crf_name, verified, deleted])

irr_cols = 3 if form_df.columns[1] == 'redcap_repeat_instrument' else 1 # number of irrelevant fields ie. record_id, redcap_repeat_instrument, redcap_repeat_instance
form_df = form_df[form_df.columns[irr_cols:]]
form_df = form_df.melt(var_name='redcap_variable')
form_df.loc[form_df['redcap_variable'].str.contains('___'), 'redcap_variable'] = form_df['redcap_variable']+')'
form_df.loc[form_df['redcap_variable'].str.contains('___'), 'redcap_variable'] = form_df['redcap_variable'].str.replace('___', '(')

def under_to_neg(row):
    redcap_variable = row['redcap_variable']
    if '(' in redcap_variable and ')' in redcap_variable:
        option = redcap_variable.split('(')[1].split(')')[0]
        if option 
            
            

form_df[form_df['redcap_variable'].str.contains('2018')]

Unnamed: 0,redcap_variable,value
66,q2018a,1
117,q2018(0),0
118,q2018(1),0
119,q2018(2),0
120,q2018(3),0
121,q2018(_10),1


In [46]:
# 1. record initialization
patient_name = '11003'
crf_name = 'mr'
form_df = pd.DataFrame(project.export_records(records=[patient_name], forms=[crf_name]))
form_df

Unnamed: 0,record_id,redcap_repeat_instrument,redcap_repeat_instance,q2001,q2002___1,q2002___2,q2002___3,q2002___4,q2003___1,q2003___2,...,q2119,q2121,q2122,q2124,q2129,q2131,mr_status,mri_comments,mr_init,mr_complete
0,11003,,,3,1,0,0,0,1,1,...,,,,,MGS,Only DWI sequence\r\n,4,"""Prior infarct seen on image?"" unanswered; JJJ...",JJJ,2


In [59]:
# 2. insert from logs


s = AMBRA_Backups.redcap_funcs.grab_logs(db, project, only_record_logs=False, start_date=datetime(2000, 1, 1))
s = pd.DataFrame(s)
sub = s[s['record'] == '11003']
display_md(sub)

|      | timestamp        | username   | action              | details                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |   record |
|-----:|:-----------------|:-----------|:--------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [44]:
comp_schema_cap_db(db_name, project_name)

  soup = BeautifulSoup(row['field_label'], 'html.parser')
  soup = BeautifulSoup(row['field_label'], 'html.parser')
  soup = BeautifulSoup(row['field_label'], 'html.parser')



-------------------mr-------------------

The following CRF_Data_RedCap.redcap_variable's are not in CRF_schema_RedCap.redcap_variable's:
['q2018(-10)', 'q2029_mr(-10)', 'q1036_mr(3)']


The following api-metadata question_text's are not in CRF_Schema_RedCap.question_text's:
{redcap_variable : question_text}

{'q2018(-10)': 'White matter hyperintensity (WMH) Periventricular?', 'q2029_mr(-10)': 'White matter hyperintensity (WMH) deep white matter (DWM)?'}




Exception: Please handle the above discrepancies