In [None]:
import pandas as pd
import numpy as np
import re
from completing_missing_clinician_questionnaire_2021 import map_additional_column_name_to_2021_clinician_column_name, should_remove_from_stu_columns, map_additional_column_name_to_2021_student_column_name , fill_id, IsClinician, upload_to_imputation_data_to_old_data_map, upload_to_old_data_column_names_map
import psycopg2

conn_str = "dbname=postgres user=postgres password=asdf123"

In [None]:
def prepare_datasets():

    old_data = pd.read_csv(r"../../Data/OriginalDataset/Schneider Depression Clinic Database.csv", na_values= ' ')
    redcap_data = pd.read_csv(r"../../Data/OriginalDataset/ImmiRiskIPT2022_DATA_2023-09-03_1503.csv", na_values= ' ')
    imputation_data = pd.read_csv(r"../../Data/helper_docs/Student_Clinician_data_2021.csv", na_values= ' ')
    # the warning rooted from the nan values


    rename_imputation_data = {
        'cssrs_followup_timestamp':'cssrs_fw_maya_timestamp',
        'cssrs_followup_complete':'cssrs_fw_maya_complete',
    }

    rename_old_data = {
        'cssrs_followup_timestamp':'cssrs_fw_maya_timestamp',
        'cssrs_followup_complete':'cssrs_fw_maya_complete',
        'chameleon_complete_stu': 'chameleon_complete'
    }

    ## error record_id = 363
    imputation_data = imputation_data[imputation_data['record_id']!=363]

    #do the imputation id like of redcap dataset
    imputation_data = fill_id(imputation_data)


    imputation_data = imputation_data.rename(rename_imputation_data, axis=1)

    old_data = old_data.rename(rename_old_data, axis=1)
    
    return old_data, redcap_data, imputation_data


In [None]:
def get_columns_range(old_data, redcap_data, imputation_data, columns_range_mapping, is_student_data=False, is_clinician_data=False):
    old_data_column_names = list(old_data.columns)
    redcap_data_column_names = list(redcap_data.columns)
    imputation_data_column_names = list(imputation_data.columns)


    columns_range_indecies = {
    'old_data_start': old_data_column_names.index(columns_range_mapping['old_data_start_column'])
    'old_data_end': old_data_column_names.index(columns_range_mapping['old_data_end_column']) + 1
    'redcap_data_start': redcap_data_column_names.index(columns_range_mapping['redcap_data_start_column'])
    'redcap_data_end': redcap_data_column_names.index(columns_range_mapping['redcap_data_end_column']) + 1
    'imputation_data_start': imputation_data_column_names.index(columns_range_mapping['imputation_data_start_column'])
    'imputation_data_end': imputation_data_column_names.index(columns_range_mapping['imputation_data_end_column']) + 1
    }
    
    # ranges
    
    old_data_columns = old_data_column_names[columns_range_indecies['old_data_start']:columns_range_indecies['old_data_end']]
    redcap_data_columns = redcap_data_column_names[columns_range_indecies['redcap_data_start']:columns_range_indecies['redcap_data_end']] 
    imputation_data_columns = imputation_data_column_names[columns_range_indecies['imputation_data_start']:columns_range_indecies['imputation_data_end']]
    
    
    # Altarations
    
    # if clinician_data
    if is_clinician_data:
        columns_range_indecies['redcap_data_start_2'] = old_data_column_names.index(columns_range_mapping['redcap_data_start_column_2'])
        columns_range_indecies['redcap_data_end_2'] = old_data_column_names.index(columns_range_mapping['redcap_data_end_column_2']) + 1
        redcap_data_columns += redcap_data_column_names[columns_range_indecies['redcap_data_start_2']:columns_range_indecies['redcap_data_end_2']] 

    
    if is_student_data:
        imputation_data_columns = imputation_data_columns + ['who', 'who_other', 'name']
        imputation_data_columns = [i for i in imputation_data_columns if not should_remove_from_stu_columns(i)]

    
    return old_data_columns, redcap_data_columns, imputation_data_columns

In [None]:
def create_columns_names_mapping(map_from, map_to, transformation_function, is_clinician_imputation_data=False):
    columns_names_mapping = {}
    
    if is_clinician_imputation_data:
        columns_names_mapping['trqsfmaris_timestamp'] = 'trqsfmarisclin_timestamp',
        columns_names_mapping['trqsfmaris_complete'] = 'trqsfmarisclin_complete'


    
    for column_name in map_from:
        mapped_name, success = transformation_function(column_name, map_to)
        if success:
            columns_names_mapping[column_name] = mapped_name
        else:
            print(chameleon_column)
    
    return columns_names_mapping

In [None]:
def upload_to_db(questionnaires_cluster, conn_str, upload_function, columns_names_mapping, table_name)

    conn = psycopg2.connect(conn_str)
    sql = f"SELECT * FROM auxiliary_questionnaires_data.{table_name};"
    column_names_table = pd.read_sql_query(sql, conn)
    conn.close()

    for questionnaire in questionnaires_cluster:

        questionnaire_columns = column_names_table.loc[column_names_table['questionnaire_name'] == questionnaire, 'column_names'].values[0]
        filtered_mapping = {key: columns_names_mapping[key] for key in questionnaire_columns if key in columns_names_mapping}
        
        upload_function(conn_str, questionnaire, filtered_mapping)


In [None]:
def add_name_mapping_to_DB(conn_str, questionnaires_cluster, columns_range_mapping, transformation_function, is_student_data=False, is_clinician_data=False):
    old_data, redcap_data, imputation_data = prepare_datasets()
    old_data_columns, redcap_data_columns, imputation_data_columns = get_columns_range(old_data, redcap_data, imputation_data, columns_range_mapping, is_student_data=is_student_data, is_clinician_data=is_clinician_data)
    

    
    old_data_2_imputation_data_mapping = create_columns_names_mapping(old_data_columns, imputation_data_columns, transformation_function, is_clinician_imputation_data=is_clinician_data)
    redcap_data_2_old_data_mapping = create_columns_names_mapping(old_data_columns, imputation_data_columns, transformation_function)
    
    upload_to_old_data_column_names_map(conn_str, questionnaire, questionnaire_old_2_redcap_map)
    
    upload_to_db(questionnaires_cluster, conn_str, upload_function, columns_names_mapping, table_name)

In [None]:
def main(conn_str):
    
    # Chameleon
    
    columns_range_mapping = 

    {
        'old_data_start_column': 'chameleon_timestamp',
        'old_data_end_column': 'chameleon_complete',
        'redcap_data_start_column': 'chameleon_timestamp',
        'redcap_data_end_column': 'chameleon_complete',
        'imputation_data_start_column': 'chameleon_timestamp',
        'imputation_data_end_column': 'chameleon_complete'

    }

    transformation_function = map_additional_column_name_to_2021_student_column_name
    add_name_mapping_to_DB(conn_str, ['chameleon'], columns_range_mapping, transformation_function)
    
    
    # Student
    
    student_questionnaires = ["opening_students", "trq_sf_maris_stu", "remote_stu", "cps_stu", "scs_stu", "mini_kid_sum_stu", "c_ssrs_stu"]
    columns_range_mapping = 

    {
        'old_data_start_column': 'opening_students_timestamp',
        'old_data_end_column': 'opening_students_timestamp',
        'redcap_data_start_column': 'opening_students_timestamp',
        'redcap_data_end_column': 'opening_students_timestamp',
        'imputation_data_start_column': 'opening_clinicians_complete',
        'imputation_data_end_column': 'cssrs_t_complete'

    }

    transformation_function = map_additional_column_name_to_2021_student_column_name
    add_name_mapping_to_DB(conn_str, student_questionnaires, columns_range_mapping, transformation_function, is_student_data=True)
    
    
    # Clinician
    
    clinician_questionnaires = ['c_ssrs_clin', 'mini_kid_sum_clin', 'screening_form', 'suicide_form_clin', 'ffq', 'cdrsr_clin', 'scs_clin', 'er_questionnaire_clin', 'cps_clin', 'remote_clin', 'opening_therapist_battery', 'cgi_s_clin', 'trq_sf_maris_clin', 'maris_y_scars_clin', 'wai_immirisk_clin']
    columns_range_mapping = 

    {
        'old_data_start_column': 'wai_immirisk_clin_timestamp',
        'old_data_end_column': 'cssrs_t_clin_complete',
        'redcap_data_start_column': 'wai_immirisk_clin_timestamp',
        'redcap_data_end_column': 'cssrs_t_clin_complete',
        'imputation_data_start_column': 'wai_immirisk_clin_timestamp',
        'imputation_data_end_column': 'cssrs_t_complete',
        'redcap_data_start_column_2': 'cssrs_fw_maya_timestamp',
        'redcap_data_end_column_2': 'cssrs_fw_maya_complete'

    }

    transformation_function = map_additional_column_name_to_2021_clinician_column_name
    add_name_mapping_to_DB(conn_str, clinician_questionnaires, columns_range_mapping, transformation_function, is_clinician_data=True)

## Chameleon

In [None]:
columns_range_mapping = 

{
    'old_data_start_column': 'chameleon_timestamp',
    'old_data_end_column': 'chameleon_complete',
    'redcap_data_start_column': 'chameleon_timestamp',
    'redcap_data_end_column': 'chameleon_complete',
    'imputation_data_start_column': 'chameleon_timestamp',
    'imputation_data_end_column': 'chameleon_complete'
    
}


transformation_function = map_additional_column_name_to_2021_student_column_name

add_name_mapping_to_DB(conn_str, ['chameleon'], columns_range_mapping, transformation_function)

# Student

In [None]:
student_questionnaires = ["opening_students", "trq_sf_maris_stu", "remote_stu", "cps_stu", "scs_stu", "mini_kid_sum_stu", "c_ssrs_stu"]


columns_range_mapping = 

{
    'old_data_start_column': 'opening_students_timestamp',
    'old_data_end_column': 'opening_students_timestamp',
    'redcap_data_start_column': 'opening_students_timestamp',
    'redcap_data_end_column': 'opening_students_timestamp',
    'imputation_data_start_column': 'opening_clinicians_complete',
    'imputation_data_end_column': 'cssrs_t_complete'
    
}

transformation_function = map_additional_column_name_to_2021_student_column_name



add_name_mapping_to_DB(conn_str, student_questionnaires, columns_range_mapping, transformation_function, is_student_data=True)

# Clinician

In [None]:
clinician_questionnaires = ['c_ssrs_clin', 'mini_kid_sum_clin', 'screening_form', 'suicide_form_clin', 'ffq', 'cdrsr_clin', 'scs_clin', 'er_questionnaire_clin', 'cps_clin', 'remote_clin', 'opening_therapist_battery', 'cgi_s_clin', 'trq_sf_maris_clin', 'maris_y_scars_clin', 'wai_immirisk_clin']


columns_range_mapping = 

{
    'old_data_start_column': 'wai_immirisk_clin_timestamp',
    'old_data_end_column': 'cssrs_t_clin_complete',
    'redcap_data_start_column': 'wai_immirisk_clin_timestamp',
    'redcap_data_end_column': 'cssrs_t_clin_complete',
    'imputation_data_start_column': 'wai_immirisk_clin_timestamp',
    'imputation_data_end_column': 'cssrs_t_complete',
    'redcap_data_start_column_2': 'cssrs_fw_maya_timestamp',
    'redcap_data_end_column_2': 'cssrs_fw_maya_complete'
    
}

transformation_function = map_additional_column_name_to_2021_clinician_column_name

add_name_mapping_to_DB(conn_str, clinician_questionnaires, columns_range_mapping, transformation_function, is_clinician_data=True)