In [129]:
import numpy as np
import pandas as pd
import json
from pandas.io.json import json_normalize 
from pandas import read_csv
import logging
from functools import reduce
import csv
from collections import defaultdict
import os
import uuid

In [130]:
etl = 'cuartero2newaqm'

input_schema_file = ''
input_data_file = ''
mapping_file = ''

datelog_dir = 'couchbase-curis-2019-06-21'

if etl == 'cuartero2newaqm':
    
    schema_meta_file = '_meta.csv'
    schema_meta_dir = 'data/processed/' + datelog_dir + '/'
    schema_meta_path = schema_meta_dir + schema_meta_file
    
    mapping_file = '2.1.TestAQMHealthInfoQuestions.V1.map.csv'
    mapping_dir = 'schema/map/Philippines/' 
    mapping_path = mapping_dir + mapping_file
    
    tmp_dir = 'data/processed/' + datelog_dir + '/tmp/'
    processed_dir = 'data/processed/' + datelog_dir + '/'
    merged_dir = 'data/merged/' + datelog_dir + '/'

## TODO: CREATE FOLDER DIRECTORY MKDIR
## TODO: PARSED FIELD TYPE: LIST -- merged into single object

## TODO: FIX ID MERGING

## TODO: FIX _id into profileId

# READ MAPPING FILE

In [131]:
#def _get_mapping_fields():
_mapping_df = pd.DataFrame()
_mapping_df = read_csv(mapping_path).sort_values(['source_key']).replace(np.nan,'',regex=True)
_mapping_df.head(3)

Unnamed: 0,source_key,source_type,destination_key,destination_type,data_type,data_source,data_format,default_value,lookup_value
9,gender,string,gender,string,existing,original,,,
0,health_informations.allergies,string,answers.allergies,string,existing,original,,,
2,health_informations.blood_pressure.first_readi...,string,answers.bp1Diastole,string,existing,original,,,


# GET THE FIELDS IN THE MAPPING FILE

In [132]:
mapping_fields_list = []
mapping_fields_list = list(filter(None, (_mapping_df['source_key'].unique())))
mapping_fields_list

['gender',
 'health_informations.allergies',
 'health_informations.blood_pressure.first_reading.diastole',
 'health_informations.blood_pressure.first_reading.systole',
 'health_informations.blood_sign',
 'health_informations.blood_sugar',
 'health_informations.exercise_in_a_week',
 'health_informations.family_history',
 'health_informations.smoking_habit',
 'profiles.civil_status',
 'registered_at',
 'user-cam.id']

# GET THE FIELDS IN THE SCHEMA META (i.e. _meta) FILE

In [133]:
meta_headers_df = pd.DataFrame()
meta_headers_df = pd.read_csv(schema_meta_path)
meta_headers_df.head(5)

Unnamed: 0,file_name,field_name,field_type
0,resident,middle_name,primitive
1,resident,birthdate,primitive
2,resident,id,primitive
3,resident,type,primitive
4,resident,first_name,primitive


## MATCH THE FIELDS IN MAPPING FILE AND SCHEMA META FILE

In [134]:
match_headers_df = pd.DataFrame()
match_headers_df = meta_headers_df[meta_headers_df['field_name'].isin(mapping_fields_list)]
match_headers_df = match_headers_df.sort_values(['file_name','field_name']).reset_index(drop=True)
match_headers_df

Unnamed: 0,file_name,field_name,field_type
0,health_informations,health_informations.allergies,primitive
1,health_informations,health_informations.blood_pressure.first_readi...,primitive
2,health_informations,health_informations.blood_pressure.first_readi...,primitive
3,health_informations,health_informations.blood_sign,primitive
4,health_informations,health_informations.exercise_in_a_week,primitive
5,health_informations,health_informations.smoking_habit,primitive
6,health_informations.family_history,health_informations.family_history,list
7,profiles,profiles.civil_status,primitive
8,resident,gender,primitive
9,resident,registered_at,primitive


# CREATE DEFAULT DICT FOR FILENAME AS KEY and FIELD NAMES AS VALUE

In [135]:
filename_per_field_dd = defaultdict(list)

for index,row in match_headers_df.iterrows():
    filename = row['file_name']
    fields = row['field_name']
    filename_per_field_dd[filename].append(fields)  
    
filename_per_field_dd

defaultdict(list,
            {'health_informations': ['health_informations.allergies',
              'health_informations.blood_pressure.first_reading.diastole',
              'health_informations.blood_pressure.first_reading.systole',
              'health_informations.blood_sign',
              'health_informations.exercise_in_a_week',
              'health_informations.smoking_habit'],
             'health_informations.family_history': ['health_informations.family_history'],
             'profiles': ['profiles.civil_status'],
             'resident': ['gender', 'registered_at', 'user-cam.id']})

## CREATE OUTPUT FILE WITH DYNAMIC NAME DERIVED FROM MAPPING FILE

In [136]:
_output_filename = mapping_file.split(sep='.')[2]
_output_filename

'TestAQMHealthInfoQuestions'

## REMOVE OUTPUT FILE IF EXISTING

In [137]:
if os.path.exists(merged_dir + _output_filename + '.csv'):
    os.remove(merged_dir + _output_filename + '.csv' )

## WRITE EMPTY CSV FOR MERGE.csv

### TODO: FILENAME MUST BE DYNAMIC

## HARDCODE TEST DATA

In [138]:
test_filename_per_field_dd = {
    'health_informations': ['health_informations.allergies',
              'health_informations.blood_pressure.first_reading.diastole',
              'health_informations.blood_pressure.first_reading.systole',
              'health_informations.blood_pressure.second_reading.diastole',
              'health_informations.blood_pressure.second_reading.systole',
              'health_informations.blood_sign',
              'health_informations.blood_type',
              'health_informations.exercise_in_a_week',
              'health_informations.smoking_habit'],
    'health_informations.family_history': ['health_informations.family_history']}

test_filename_per_field_dd = {'profiles': ['profiles.civil_status','profiles.employment.is_employed','profiles.education','profiles.employment.nature','profiles.religion']}
test_filename_per_field_dd = {
        'health_informations': [
                  'health_informations.allergies',
                  'health_informations.blood_pressure.first_reading.diastole',
                  'health_informations.blood_pressure.first_reading.systole',
                  'health_informations.blood_sign',
                  'health_informations.exercise_in_a_week',
                  'health_informations.smoking_habit'],
        'health_informations.family_history': [
                  'health_informations.family_history'],
        'profiles': ['profiles.civil_status'],
        'resident': ['gender','registered_at','user-cam.id']}

test_filename_per_field_dd = filename_per_field_dd

## CREATE OUTPUT FILE WITH HEADERS BASED ON _META AND MAPPING FILE

In [139]:
fields_list = list(test_filename_per_field_dd.values())
flat_fields_list = [item for sublist in fields_list for item in sublist]
all_fields_list  = ['_id','_index_map'] + flat_fields_list

empty_data_df = pd.DataFrame(columns=all_fields_list)
empty_data_df.to_csv(merged_dir + _output_filename + '.csv', encoding='utf-8', mode='w', header=True,index=False)
empty_data_df

Unnamed: 0,_id,_index_map,health_informations.allergies,health_informations.blood_pressure.first_reading.diastole,health_informations.blood_pressure.first_reading.systole,health_informations.blood_sign,health_informations.exercise_in_a_week,health_informations.smoking_habit,health_informations.family_history,profiles.civil_status,gender,registered_at,user-cam.id


## TODO: DYNAMICALLY MERGE DATA FROM DIFFERENT 

In [140]:
fields_list = list(test_filename_per_field_dd.values())
flat_fields_list = [item for sublist in fields_list for item in sublist]
flat_fields_list

['health_informations.allergies',
 'health_informations.blood_pressure.first_reading.diastole',
 'health_informations.blood_pressure.first_reading.systole',
 'health_informations.blood_sign',
 'health_informations.exercise_in_a_week',
 'health_informations.smoking_habit',
 'health_informations.family_history',
 'profiles.civil_status',
 'gender',
 'registered_at',
 'user-cam.id']

In [141]:
_filename = []
_filename = list(test_filename_per_field_dd.keys())[0]
_filename

'health_informations'

In [142]:
_fields = []
_fields = test_filename_per_field_dd[_filename]
_mandatory_fields = ['_id','_index_map']
_fields = _fields + _mandatory_fields
_fields

['health_informations.allergies',
 'health_informations.blood_pressure.first_reading.diastole',
 'health_informations.blood_pressure.first_reading.systole',
 'health_informations.blood_sign',
 'health_informations.exercise_in_a_week',
 'health_informations.smoking_habit',
 '_id',
 '_index_map']

In [143]:
_test_df = pd.read_csv(processed_dir + _filename + '.csv', dtype={"_index_map": str}).sort_values(['_id','_index_map']) 
_test_df = _test_df[_fields].replace(np.nan,'',regex=True)
_test_df.head(3)

Unnamed: 0,health_informations.allergies,health_informations.blood_pressure.first_reading.diastole,health_informations.blood_pressure.first_reading.systole,health_informations.blood_sign,health_informations.exercise_in_a_week,health_informations.smoking_habit,_id,_index_map
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,0
2,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,1
1,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,0


In [144]:
_tmp_df = pd.DataFrame(columns=['_id','_index_map'])
_tmp_df.to_csv(merged_dir + 'tmp_merge' + '.csv',index=False)
_tmp_df

Unnamed: 0,_id,_index_map


In [145]:
_tmp_df = _tmp_df.merge(_test_df,on=["_id","_index_map"], how="outer",  suffixes=('_x', '_y') )
_tmp_df 

Unnamed: 0,health_informations.allergies,health_informations.blood_pressure.first_reading.diastole,health_informations.blood_pressure.first_reading.systole,health_informations.blood_sign,health_informations.exercise_in_a_week,health_informations.smoking_habit,_id,_index_map
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,0
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,1
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,0


In [146]:
list(test_filename_per_field_dd.items())

[('health_informations',
  ['health_informations.allergies',
   'health_informations.blood_pressure.first_reading.diastole',
   'health_informations.blood_pressure.first_reading.systole',
   'health_informations.blood_sign',
   'health_informations.exercise_in_a_week',
   'health_informations.smoking_habit']),
 ('health_informations.family_history',
  ['health_informations.family_history']),
 ('profiles', ['profiles.civil_status']),
 ('resident', ['gender', 'registered_at', 'user-cam.id'])]

## TODO: optimize here. tmp_df will run out of memory

## TODO: identify here if primitive or list. if list .agg to_list

## ==============MANUAL DATA / DUMMY GET DATA===========

## RESIDENT DATA

In [169]:
_resident_df = pd.read_csv(processed_dir + 'resident' + '.csv', dtype={"_index_map": str}).sort_values(['_id','_index_map']) 
_resident_df = _resident_df[['gender','registered_at','user-cam.id','_id','_index_map']].replace(np.nan,'',regex=True)
_resident_df.head(3)

Unnamed: 0,gender,registered_at,user-cam.id,_id,_index_map
0,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,2f920d39-319e-4bf7-90c8-6133c3337af7,
1,Female,08/16/2018 at 10:25:07 PM GMT+08:00,aguzman@gmail.com,64c2e7de-0cc8-4df5-a54f-02398652c080,


## PROFILE DATA

In [291]:
_profile_df = pd.read_csv(processed_dir + 'profiles' + '.csv', dtype={"_index_map": str}).sort_values(['_id','_index_map']) 
_profile_df = _profile_df[['profiles.civil_status', '_id', '_index_map']].replace(np.nan,'',regex=True)
_profile_df = _profile_df.replace(np.nan,'',regex=True).reset_index(drop=True)


_profile_df.head(3)

Unnamed: 0,profiles.civil_status,_id,_index_map
0,Single,2f920d39-319e-4bf7-90c8-6133c3337af7,0
1,Single,64c2e7de-0cc8-4df5-a54f-02398652c080,0


In [292]:
#_profile_df['_index_map'] = _profile_df.index.to_series().map(lambda x: '')
_profile_df

_profile_df['_index_map'] = _profile_df['_index_map'].apply(_flatten_index_map)
_profile_df

Unnamed: 0,profiles.civil_status,_id,_index_map
0,Single,2f920d39-319e-4bf7-90c8-6133c3337af7,
1,Single,64c2e7de-0cc8-4df5-a54f-02398652c080,


## HEALTH INFORATION DATA

In [285]:
_hi_df = pd.read_csv(processed_dir + 'health_informations' + '.csv', dtype={"_index_map": str}).sort_values(['_id','_index_map']) 
_hi_df  = _hi_df [['health_informations.allergies',
               'health_informations.blood_pressure.first_reading.diastole',
               'health_informations.blood_pressure.first_reading.systole',
               'health_informations.blood_sign',
               'health_informations.exercise_in_a_week',
               'health_informations.smoking_habit','_id','_index_map']].replace(np.nan,'',regex=True).reset_index(drop=True)
_hi_df.head(3)

Unnamed: 0,health_informations.allergies,health_informations.blood_pressure.first_reading.diastole,health_informations.blood_pressure.first_reading.systole,health_informations.blood_sign,health_informations.exercise_in_a_week,health_informations.smoking_habit,_id,_index_map
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,0
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,1
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,0


In [288]:
#_hi_df['_index_map'] = _hi_df.index.to_series().map(lambda x: '')

_hi_df['_index_map'] = _hi_df['_index_map'].apply(_flatten_index_map)
_hi_df

Unnamed: 0,health_informations.allergies,health_informations.blood_pressure.first_reading.diastole,health_informations.blood_pressure.first_reading.systole,health_informations.blood_sign,health_informations.exercise_in_a_week,health_informations.smoking_habit,_id,_index_map
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,1.0
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,


In [289]:
def _flatten_index_map(index):
    
    _new_index = ''
    
    if index == '0':
        _new_index = '' 
    elif isinstance(index,list):
        _new_index = ''
    else:
        _new_index = index
        
    return _new_index

## TODO: Combine LIST tolist()

## HEALTH INFORMATION FAMILY DATA

## IF type == list make index as 0

In [296]:
_hif_df = pd.DataFrame()
_hif_df = pd.read_csv(processed_dir + 'health_informations.family_history' + '.csv', dtype={"_index_map": str}).sort_values(['_id','_index_map']) 
_hif_df  = _hif_df [['health_informations.family_history', '_id', '_index_map']].replace(np.nan,'',regex=True).reset_index(drop=True)
_hif_df.head(3)

Unnamed: 0,health_informations.family_history,_id,_index_map
0,PNEU,2f920d39-319e-4bf7-90c8-6133c3337af7,0.0
1,HPN,2f920d39-319e-4bf7-90c8-6133c3337af7,0.1
2,CA,64c2e7de-0cc8-4df5-a54f-02398652c080,0.0


In [297]:
#_hi_df = _hi_df[_hi_df['_index_map'] == '0']
#_hif_df = _hif_df.drop('_index_map',axis=1)
_hif_df = _hif_df.groupby('_id').agg(lambda x: x.tolist())
#_hif_df['_index_map'] = _hif_df.index.to_series().map(lambda x: '0')

#_hif_df['_index_map'] = _hif_df.index.to_series().map(lambda x: '')
#_hif_df = _hif_df['_index_map'] = ''
_hif_df = _hif_df.reset_index()
_hif_df

Unnamed: 0,_id,health_informations.family_history,_index_map
0,2f920d39-319e-4bf7-90c8-6133c3337af7,"[PNEU, HPN]","[0.0, 0.1]"
1,64c2e7de-0cc8-4df5-a54f-02398652c080,"[CA, HPN]","[0.0, 0.1]"


In [298]:
#_hif_df['_index_map'] = _hif_df.index.to_series().map(lambda x: '')
#_hif_df

_hif_df['_index_map'] = _hif_df['_index_map'].apply(_flatten_index_map)
_hif_df

Unnamed: 0,_id,health_informations.family_history,_index_map
0,2f920d39-319e-4bf7-90c8-6133c3337af7,"[PNEU, HPN]",
1,64c2e7de-0cc8-4df5-a54f-02398652c080,"[CA, HPN]",


## MERGE SELECTED FILES --sample script for merging dataframe

In [299]:
_merge_df = pd.DataFrame()
_merge_df = _hi_df.merge(_hif_df,on=["_id","_index_map"], how="outer",  suffixes=('_x', '_y') )
_merge_df = _merge_df.merge(_profile_df,on=["_id","_index_map"], how="outer",  suffixes=('_x', '_y') )
_merge_df = _merge_df.merge(_resident_df,on=["_id","_index_map"], how="outer",  suffixes=('_x', '_y') )
#_merge_df.to_csv(merged_dir + 'manual_merge' + '.csv',index=False)
_merge_df.to_csv(merged_dir + 'dynamic_merge' + '.csv',index=False)
_merge_df.T

Unnamed: 0,0,1,2
health_informations.allergies,,,
health_informations.blood_pressure.first_reading.diastole,80,,90
health_informations.blood_pressure.first_reading.systole,130,,120
health_informations.blood_sign,-,,+
health_informations.exercise_in_a_week,0,,3x
health_informations.smoking_habit,Pssive,Never,Active
_id,2f920d39-319e-4bf7-90c8-6133c3337af7,2f920d39-319e-4bf7-90c8-6133c3337af7,64c2e7de-0cc8-4df5-a54f-02398652c080
_index_map,,1,
health_informations.family_history,"[PNEU, HPN]",,"[CA, HPN]"
profiles.civil_status,Single,,Single


## WRITE MERGE DATA TO CSV

In [157]:
test_group_df = pd.read_csv(merged_dir + 'dynamic_merge' + '.csv',dtype={'_index_map': str}).replace(np.nan,'',regex=True)

In [158]:
test_group_df.T

Unnamed: 0,0,1,2
health_informations.allergies,,,
health_informations.blood_pressure.first_reading.diastole,80,,90
health_informations.blood_pressure.first_reading.systole,130,,120
health_informations.blood_sign,-,,+
health_informations.exercise_in_a_week,0,,3x
health_informations.smoking_habit,Pssive,Never,Active
_id,2f920d39-319e-4bf7-90c8-6133c3337af7,2f920d39-319e-4bf7-90c8-6133c3337af7,64c2e7de-0cc8-4df5-a54f-02398652c080
_index_map,,,
health_informations.family_history,"['PNEU', 'HPN']","['PNEU', 'HPN']","['CA', 'HPN']"
profiles.civil_status,Single,Single,Single


## RENAME THE HEADERS USING MAPPED HEADERS

In [159]:
source_destination_keys_df = pd.DataFrame()
source_destination_keys_df = _mapping_df[['source_key','destination_key']]

In [160]:
new_column_name_dict = dict(zip(source_destination_keys_df['source_key'], source_destination_keys_df['destination_key']))
new_column_name_dict 

{'gender': 'gender',
 'health_informations.allergies': 'answers.allergies',
 'health_informations.blood_pressure.first_reading.diastole': 'answers.bp1Diastole',
 'health_informations.blood_pressure.first_reading.systole': 'answers.bp1Systole',
 'health_informations.blood_sign': 'answers.rhesusType',
 'health_informations.blood_sugar': 'answers.bloodSugar',
 'health_informations.exercise_in_a_week': 'answers.weeklyExercise',
 'health_informations.family_history': 'answers.familyDiagnosis',
 'health_informations.smoking_habit': 'answers.smokingHabit',
 'profiles.civil_status': 'answers.civilStatus',
 'registered_at': 'dateCreated',
 'user-cam.id': 'createdBy',
 '': 'type'}

## GET THE DATA from merged dataframes

In [161]:
_required_data_df = pd.DataFrame()
#_required_data_df = _profile_df
_required_data_df = test_group_df

## APPLICABLE only if not Resident data

## DO NECESSARY TRANSFORMATION

In [162]:
_required_data_df.rename(columns = new_column_name_dict,inplace=True )

## APPLICABLE only if not Resident data
_required_data_df.rename(columns={'_id': 'profileId'}, inplace=True)
_required_data_df.head(3)

Unnamed: 0,answers.allergies,answers.bp1Diastole,answers.bp1Systole,answers.rhesusType,answers.weeklyExercise,answers.smokingHabit,profileId,_index_map,answers.familyDiagnosis,answers.civilStatus,gender,dateCreated,createdBy
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,,"['CA', 'HPN']",Single,Female,08/16/2018 at 10:25:07 PM GMT+08:00,aguzman@gmail.com


## GENERATE _id column

In [163]:
_required_data_df['_id'] = _required_data_df.index.to_series().map(lambda x: uuid.uuid4())
_required_data_df

Unnamed: 0,answers.allergies,answers.bp1Diastole,answers.bp1Systole,answers.rhesusType,answers.weeklyExercise,answers.smokingHabit,profileId,_index_map,answers.familyDiagnosis,answers.civilStatus,gender,dateCreated,createdBy,_id
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,d3750c83-4e2f-4181-a267-d0641d3a1ac7
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,295d1ced-6fca-41a2-a113-14a1d997f7b6
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,,"['CA', 'HPN']",Single,Female,08/16/2018 at 10:25:07 PM GMT+08:00,aguzman@gmail.com,f6208d07-8492-46ad-834d-b97fa62252c6


## CLEAR _INDEX_MAP since each _id has generaed ID

In [164]:
_required_data_df['_index_map'] = ''
_required_data_df

Unnamed: 0,answers.allergies,answers.bp1Diastole,answers.bp1Systole,answers.rhesusType,answers.weeklyExercise,answers.smokingHabit,profileId,_index_map,answers.familyDiagnosis,answers.civilStatus,gender,dateCreated,createdBy,_id
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,d3750c83-4e2f-4181-a267-d0641d3a1ac7
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,295d1ced-6fca-41a2-a113-14a1d997f7b6
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,,"['CA', 'HPN']",Single,Female,08/16/2018 at 10:25:07 PM GMT+08:00,aguzman@gmail.com,f6208d07-8492-46ad-834d-b97fa62252c6


## GET THE NEW FIELDS WITH THE DEFAULT VALUES

In [165]:
new_fields_df = _mapping_df[_mapping_df['data_type'] == 'new'][['destination_key', 'default_value']]
new_fields_df

Unnamed: 0,destination_key,default_value
12,answers.consentBloodTest,N
13,answers.dailyFiberIntake,
14,dateUpdated,2019-06-27T21:00:51.934+08:00
15,formId,aF5AuuQSBToFGyuFz9HGi9
16,formName,2.2 - AQM HealthInfo Questions V1
17,type,profile-related-form


## APPEND THE NEW FIELDS AS COLUMN

In [166]:
for index,row in new_fields_df.iterrows():
    _header = row['destination_key']
    _value = row['default_value']
    _required_data_df[_header] = _value

_required_data_df = _required_data_df.reset_index(drop=True)
_required_data_df.head(5)

Unnamed: 0,answers.allergies,answers.bp1Diastole,answers.bp1Systole,answers.rhesusType,answers.weeklyExercise,answers.smokingHabit,profileId,_index_map,answers.familyDiagnosis,answers.civilStatus,gender,dateCreated,createdBy,_id,answers.consentBloodTest,answers.dailyFiberIntake,dateUpdated,formId,formName,type
0,,80.0,130.0,-,0,Pssive,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,d3750c83-4e2f-4181-a267-d0641d3a1ac7,N,,2019-06-27T21:00:51.934+08:00,aF5AuuQSBToFGyuFz9HGi9,2.2 - AQM HealthInfo Questions V1,profile-related-form
1,,,,,,Never,2f920d39-319e-4bf7-90c8-6133c3337af7,,"['PNEU', 'HPN']",Single,Male,07/12/2018 at 10:54:14 PM GMT+08:00,csoriano@gmail.com,295d1ced-6fca-41a2-a113-14a1d997f7b6,N,,2019-06-27T21:00:51.934+08:00,aF5AuuQSBToFGyuFz9HGi9,2.2 - AQM HealthInfo Questions V1,profile-related-form
2,,90.0,120.0,+,3x,Active,64c2e7de-0cc8-4df5-a54f-02398652c080,,"['CA', 'HPN']",Single,Female,08/16/2018 at 10:25:07 PM GMT+08:00,aguzman@gmail.com,f6208d07-8492-46ad-834d-b97fa62252c6,N,,2019-06-27T21:00:51.934+08:00,aF5AuuQSBToFGyuFz9HGi9,2.2 - AQM HealthInfo Questions V1,profile-related-form


# WRITE TO CSV FINALIZED


In [167]:
_required_data_df.to_csv(merged_dir + _output_filename + '.csv', encoding='utf-8', mode='w', header=True,index=False)

In [None]:
dd