# HiRID conversion to OMOP CDM v6

In [2]:
import pandas as pd
import numpy as np
import psycopg2
import sqlalchemy 
import re
import time
import datetime
import psycopg2

In [23]:
class UploadDB_Optimized:
    """
    Functionality: In this class you can translate the whole hiRID into Postgresql, feel free to modify it according to your needs\n
    Variables:\n
    \t*hirid_directory: Folder where your HiRID files uncompressed are located. Do not modify the file tree in order to work properly\n
    \t*postgres_add: Connection string for the postgres database\n
    \t*mapping_file: Directory path and file name for the Usagi's mapping folder, for more information about it check:\nhttps://www.ohdsi.org/web/wiki/doku.php?id=documentation:software:usagi\n 
    \t*max_group: Default value 250. Given that HiRID is divided in 250 groups the max group is the number of groups you want to upload\n
    \t*sampling_mode: Allows the user to insert only a percentage of the data\n
    Example:\t
    \tuploadDb=UploadDB_Optimized('D:\\YourFolderGoesHere\\',"dbname='dbname' user='user' host='localhost' password='password' connect_timeout=1",'D:\\DirectoryPath\\Vocabulary_translation.csv',1)\n
    \tuploadDb.translate_upload()
    """
    def __init__(self, hirid_directory, postgres_add, mapping_file,  max_group=250, sampling_mode=False):
        #Simple variables
        self.hirid_directory = hirid_directory
        self.postgres_add = postgres_add
        if(max_group>250):
            max_group=250
        elif(max_group<1):
            max_group=1
        self.max_group = max_group
        self.person_upload_time=0
        self.sampling_mode=sampling_mode
        #Arrays &  Dictionaries
        self.groups=np.arange(0,self.max_group)
        self.pharma_lengths=np.array([])
        self.measure_lengths=np.array([])
        self.obs_period_lengths=np.array([])
        self.visit_lengths=np.array([])
        self.obs_lengths=np.array([])
        #Dataframes
        self.source_2_map=pd.read_csv(mapping_file, sep=',')
        self.variableid_dict=self.source_2_map.set_index('source_code')['target_concept_id'].to_dict()
        self.person_df=pd.DataFrame()
        self.observations_df=pd.DataFrame()
        self.pharmaceutic_df=pd.DataFrame()
        self.person_visit_dict={}
        #Statistics
        self.local_initial_rows_obs=0
        self.local_final_rows_obs=0
        self.local_inital_rows_phr=0
        self.local_final_rows_phr=0
        self.global_initial_rows_obs=0
        self.global_final_rows_obs=0
        self.global_inital_rows_phr=0
        self.global_final_rows_phr=0
        pass
    
    
    def translate_upload(self):
        print('Beginning the translate and upload process...')
        begin_time=datetime.datetime.now()
        print('Process began at: '+str(begin_time))
        global_start = time.process_time()
        self.upload_person()
        if(self.sampling_mode):
            print('Translating in sample mode, expect less translated items')
            pass
        for i in self.groups:
            group_start = time.process_time()
            print('\tTranslating item '+str(i+1) + ' of 250')
            self.upload_observation_period(i)
            self.upload_visit_occurrence(i)
            self.upload_measurement(i)
            self.global_initial_rows_obs+=self.local_initial_rows_obs
            self.global_final_rows_obs+=self.local_final_rows_obs
            
            self.upload_pharmaceuticals(i)
            self.global_inital_rows_phr+=self.local_inital_rows_phr
            self.global_final_rows_phr+=self.local_final_rows_phr
            print('\tGroup #{:d} completed!'.format(i))
            pass
        end_time=datetime.datetime.now()
        final_time=(end_time-begin_time).total_seconds()
        print('Full Translation completed!')
        print('{:d} of {:d} ({:.2%}) total observations translated'.format(self.global_final_rows_obs,
                                                                               self.global_initial_rows_obs,
                                                                              float(self.global_final_rows_obs)/float(self.global_initial_rows_obs)))
        print('{:d} of {:d} ({:.2%}) drugs records translated'.format(self.global_final_rows_phr,
                                                                          self.global_inital_rows_phr,
                                                                         float(self.global_final_rows_phr)/float(self.global_inital_rows_phr)))
        total_initial=self.global_initial_rows_obs+self.global_inital_rows_phr
        total_final=self.global_final_rows_obs+self.global_final_rows_phr
        print('{:d} of {:d} ({:.2%}) all the records were translated'.format(total_final,
                                                                                 total_initial,
                                                                                 float(total_final)/float(total_initial)))
        print('Process ended at: '+str(datetime.datetime.now()))
        pass
    
    def convert_df2sql(self,dataframe,table_name):
        dataframe.to_csv(self.hirid_directory+table_name+'.csv',index=False)
        conn = psycopg2.connect(self.postgres_add)
        cur = conn.cursor()
        with open('D:\\MIMICIII\\HiRID\\'+table_name+'.csv', 'r') as f:
            # Notice that we don't need the `csv` module.
            next(f) # Skip the header row.
            cur.copy_from(f, table_name, sep=',',null='')

        conn.commit()
        conn.close()
        pass
    
    def transform_float(self,x):
        try:
            if (isinstance(x, str)):
                return float(re.findall(r'\d+', x)[0])
            else:
                return float(x)
        except Exception as e:
            print(e)
            print(x)
            return(np.nan)
        pass
    
    def transform_float_min(self,x):
        try:
            if (isinstance(x, str)):
                return float(re.findall(r'\d+', x)[0])
            else:
                return float(x)
        except Exception as e:
            print(e)
            print(x)
            return(np.nan)
        pass
    
    def upload_measurement(self,group_number):
        print('\t\tUploading measurements for group #{:d}'.format(group_number))
        obs2measurement=self.observations_df.copy()
        self.local_initial_rows_obs=obs2measurement.shape[0]
        obs2measurement['measurement_concept_id']=obs2measurement['variableid'].map(self.variableid_dict)
        obs2measurement[obs2measurement[['measurement_concept_id']].isnull().any(axis=1)][['variableid']].count().values
        obs2measurement.rename(columns={'patientid': 'person_id'})
        obs2measurement[obs2measurement[['measurement_concept_id']].isnull().any(axis=1)][['variableid']].drop_duplicates()
        obs2measurement=obs2measurement[obs2measurement.measurement_concept_id.notna()]
        obs2measurement['measurement_concept_id']=obs2measurement.measurement_concept_id.astype(int)
        obs2measurement['measurement_datetime']=pd.to_datetime(obs2measurement['datetime'], dayfirst=True)
        obs2measurement['measurement_date']=pd.to_datetime(obs2measurement['datetime']).dt.date
        obs2measurement['measurement_time']=pd.to_datetime(obs2measurement['datetime']).dt.time
        obs2measurement['measurement_type_concept_id']=42530833 #LOINC: Laboratory
        obs2measurement['value_as_number']=obs2measurement.value
        obs2measurement['unit_source_value']=obs2measurement.status
        obs2measurement['measurement_source_concept_id']=obs2measurement.type
        obs2measurement['value_source_value']=obs2measurement.value
        obs2measurement = obs2measurement[obs2measurement.stringvalue != '!folgt']
        obs2measurement = obs2measurement[obs2measurement.stringvalue != '!fehlt']
        obs2measurement = obs2measurement[obs2measurement.stringvalue != '!folgt mmol/L']
        obs2measurement = obs2measurement[obs2measurement.stringvalue != 'mmol/L']
        obs2measurement = obs2measurement[obs2measurement.stringvalue != '---']
        obs2measurement = obs2measurement[obs2measurement.stringvalue != 'N']
        obs2measurement['range_low'] = [self.transform_float_min(x.replace('<','')) if str(x).find('<') != -1 else np.nan for x in obs2measurement['stringvalue']]
        obs2measurement['range_high'] = [self.transform_float(x) if (str(x).find('<') != 0 and x is not np.nan) else np.nan for x in obs2measurement['stringvalue']]
        obs2measurement.value_as_concept_id=0
        obs2measurement.loc[obs2measurement['status']==1, 'value_as_concept_id'] = 45878591 #LOINC: out of range
        obs2measurement.loc[obs2measurement['status']==64, 'value_as_concept_id'] = 45880425 #LOINC: Greatly
        obs2measurement.loc[obs2measurement['status']==32, 'value_as_concept_id'] = 36309396 #LOINC: Not Measured 
        obs2measurement.person_id=obs2measurement.patientid
        obs2measurement['visit_occurrence_id']=obs2measurement['patientid'].map(self.person_visit_dict)
        #passing the data
        obs2measurement['person_id']=obs2measurement.patientid
        obs2measurement['measurement_id'] = np.arange(len(obs2measurement))+(self.measure_lengths.sum()+1) #np.arange(len(obs2measurement))+1
        self.measure_lengths=np.append(self.measure_lengths,self.observations_df.shape[0])
        obs2measurement['operator_concept_id'] = np.nan
        obs2measurement['unit_concept_id'] = np.nan
        obs2measurement['provider_id'] = np.nan
        obs2measurement['visit_detail_id'] = np.nan
        obs2measurement['measurement_source_value'] = obs2measurement.value_as_number
        obs2measurement['measurement_source_concept_id'] = obs2measurement.variableid
        obs2measurement['measurement_time'] = obs2measurement.measurement_time.astype(str)
        obs2measurement['measurement_time'] = obs2measurement['measurement_time'].str.slice(0,8)
        obs2measurement=obs2measurement[['measurement_id','person_id','measurement_concept_id','measurement_date',
                            'measurement_datetime','measurement_time','measurement_type_concept_id',
                           'operator_concept_id','value_as_number','value_as_concept_id','unit_concept_id',
                            'range_low','range_high','provider_id','visit_occurrence_id','visit_detail_id',
                            'measurement_source_value','measurement_source_concept_id','unit_source_value',
                            'value_source_value'
                           ]]
        obs2measurement['measurement_id']=obs2measurement['measurement_id'].astype(int)
        obs2measurement['person_id']=obs2measurement['person_id'].astype(int)
        obs2measurement['visit_occurrence_id']=obs2measurement['visit_occurrence_id'].astype(int)
        if(self.sampling_mode):
            obs2measurement=obs2measurement.sample(frac=0.1, replace=False)
            pass
            
        self.convert_df2sql(obs2measurement, 'measurement')
        self.local_final_rows_obs=obs2measurement.shape[0]
        
        print('\t\tMeasurement #{:d} done!'.format(group_number))
        print('\t\t{:d} of {:d} ({:.2%}) observations translated'.format(self.local_final_rows_obs,
                                                                         self.local_initial_rows_obs,
                                                               float(self.local_final_rows_obs)/float(self.local_initial_rows_obs)))
        pass
    
    
    def upload_pharmaceuticals(self, group_number):
        print('\t\tUploading visit pharmaceuticals for group #{:d}'.format(group_number))
        self.pharmaceutic_df=pd.read_csv(self.hirid_directory+'raw_stage\\pharma_records\\csv\\part-'+str(group_number)+'.csv')
        pharma_2_drug=self.pharmaceutic_df.copy()
        self.local_inital_rows_phr=pharma_2_drug.shape[0]
        pharma_2_drug['drug_exposure_id'] = np.arange(len(pharma_2_drug))+(self.pharma_lengths.sum()+1)
        self.pharma_lengths=np.append(self.pharma_lengths,self.pharmaceutic_df.shape[0])
        pharma_2_drug['person_id']=pharma_2_drug['patientid']
        pharma_2_drug['drug_exposure_start_datetime']=pd.to_datetime(pharma_2_drug['givenat'], dayfirst=True)
        pharma_2_drug['drug_exposure_start_date']=pd.to_datetime(pharma_2_drug['drug_exposure_start_datetime']).dt.date
        pharma_2_drug['drug_exposure_end_datetime']=pd.to_datetime(pharma_2_drug['enteredentryat'], dayfirst=True)
        pharma_2_drug['drug_exposure_end_datetime']=np.where(pharma_2_drug['drug_exposure_end_datetime']>pharma_2_drug['drug_exposure_start_datetime'],pharma_2_drug['drug_exposure_end_datetime'],pharma_2_drug['drug_exposure_start_datetime'])
        pharma_2_drug['drug_exposure_end_date']=pd.to_datetime(pharma_2_drug['drug_exposure_end_datetime']).dt.date
        pharma_2_drug['verbatim_end_date']=pd.to_datetime(pharma_2_drug['drug_exposure_end_datetime']).dt.date
        pharma_2_drug['drug_type_concept_id']=38000180
        pharma_2_drug['stop_reason']=0
        pharma_2_drug['visit_occurrence_id']=pharma_2_drug['patientid'].map(self.person_visit_dict)
        pharma_2_drug['quantity']=pharma_2_drug['givendose']
        pharma_2_drug['days_supply']=0
        pharma_2_drug['sig']=0
        pharma_2_drug['lot_number']=np.nan
        pharma_2_drug['provider_id']=np.nan
        pharma_2_drug['visit_detail_id']=np.nan
        pharma_2_drug['drug_source_value']=pharma_2_drug['pharmaid']
        pharma_2_drug['drug_source_concept_id']=pharma_2_drug['typeid']
        pharma_2_drug['route_source_value']=pharma_2_drug['route']
        pharma_2_drug['dose_unit_source_value']=pharma_2_drug['doseunit']
        pharma_2_drug['drug_concept_id']=pharma_2_drug['pharmaid'].map(self.variableid_dict)
        pharma_2_drug['route_concept_id']=0
        pharma_2_drug.loc[pharma_2_drug['route']=='iv-inf', 'route_concept_id'] = 45884925 #LOINC: Infusion
        pharma_2_drug.loc[pharma_2_drug['route']=='iv-inj', 'route_concept_id'] = 45877627 #LOINC: Injection Intravenous
        pharma_2_drug.loc[pharma_2_drug['route']=='SC-inj', 'route_concept_id'] = 45882083 #LOINC: Injection, subcutaneous
        pharma_2_drug.loc[pharma_2_drug['route']=='SC-inj', 'route_concept_id'] = 45882083 #LOINC: Injection, subcutaneous
        pharma_2_drug.loc[pharma_2_drug['route']=='inhal', 'route_concept_id'] = 45880877 #LOINC: Inhalation
        pharma_2_drug.loc[pharma_2_drug['route']=='ep-inj', 'route_concept_id'] = 45877638 #LOINC: Injection, epidural
        pharma_2_drug.loc[pharma_2_drug['route']=='transcutan', 'route_concept_id'] = 45877646 #LOINC: Transdermal
        pharma_2_drug.loc[pharma_2_drug['route']=='paravert', 'route_concept_id'] = 4170267 #LOINC: Paravertebral route
        pharma_2_drug['refills'] = pharma_2_drug.groupby(['route_concept_id', 'infusionid']).cumcount()
        pharma_2_drug = pharma_2_drug[pharma_2_drug['drug_concept_id'].notna()]
        pharma_2_drug=pharma_2_drug[['drug_exposure_id','person_id','drug_concept_id','drug_exposure_start_date',
                            'drug_exposure_start_datetime','drug_exposure_end_date','drug_exposure_end_datetime',
                           'verbatim_end_date','drug_type_concept_id','stop_reason','refills',
                            'quantity','days_supply','sig','route_concept_id','lot_number',
                            'provider_id','visit_occurrence_id','visit_detail_id',
                            'drug_source_value','drug_source_concept_id','route_source_value','dose_unit_source_value'
                           ]]
        pharma_2_drug['drug_exposure_id']=pharma_2_drug['drug_exposure_id'].astype(int)
        pharma_2_drug['person_id']=pharma_2_drug['person_id'].astype(int)
        pharma_2_drug['visit_occurrence_id']=pharma_2_drug['visit_occurrence_id'].astype(int)
        pharma_2_drug['drug_concept_id']=pharma_2_drug['drug_concept_id'].astype(float).astype(int)
        if(self.sampling_mode):
            pharma_2_drug=pharma_2_drug.sample(frac=0.1, replace=False)
            pass
        self.convert_df2sql(pharma_2_drug, 'drug_exposure')
        self.local_final_rows_phr=pharma_2_drug.shape[0]
        print('\t\tPharmaceuticals #{:d} done!'.format(group_number))
        print('\t\t{:d} of {:d} ({:.2%}) drugs records translated'.format(self.local_final_rows_phr,
                                                                          self.local_inital_rows_phr,
                                                                         float(self.local_final_rows_phr)/float(self.local_inital_rows_phr)))
        pass
    
    
    def upload_observation_period(self,group_number):
        print('\t\tUploading observational period for group #{:d}'.format(group_number))
        self.observations_df=pd.read_csv(self.hirid_directory+'raw_stage\\observation_tables\\csv\\part-'+str(group_number)+'.csv')
        obs_period=self.observations_df.copy()
        obs_period['visit_start_datetime']=obs_period.groupby(['patientid'])['datetime'].transform('min')
        obs_period['visit_end_datetime']=obs_period.groupby(['patientid'])['entertime'].transform('max')
        obs_period['observation_period_start_date']= pd.to_datetime(obs_period['visit_start_datetime']).dt.date
        obs_period['observation_period_end_date']= pd.to_datetime(obs_period['visit_end_datetime']).dt.date
        obs_period['period_type_concept_id']=44814724 # Obs Period Type: Period covering healthcare encounters
        obs_period = obs_period.rename(columns={'patientid': 'person_id'})
        obs_period['observation_period_id']=0
        obs_period=obs_period[['observation_period_id','person_id','observation_period_start_date',
                               'observation_period_end_date','period_type_concept_id']]
        obs_period=obs_period.drop_duplicates()
        obs_period['observation_period_id']= np.arange(len(obs_period))+(self.obs_period_lengths.sum()+1)
        obs_period['observation_period_id'] = obs_period['observation_period_id'].astype(int)
        self.obs_period_lengths=np.append(self.obs_period_lengths,obs_period.shape[0])
        self.convert_df2sql(obs_period, 'observation_period')
        print('\t\tObservational period #{:d} done!'.format(group_number))
        pass
    
    def upload_visit_occurrence(self, group_number):
        print('\t\tUploading visit occurrence for group #{:d}'.format(group_number))
        self.observasations_df=pd.read_csv(self.hirid_directory+'raw_stage\\observation_tables\\csv\\part-'+str(group_number)+'.csv')
        
        visit_obs=self.observations_df.copy()
        visit_obs['visit_start_datetime']=visit_obs.groupby(['patientid'])['datetime'].transform('min')
        visit_obs['visit_end_datetime']=visit_obs.groupby(['patientid'])['entertime'].transform('max')
        visit_obs['visit_start_date']= visit_obs['visit_start_datetime']
        visit_obs['visit_end_date']= visit_obs['visit_end_datetime']
        visit_obs['visit_type_concept_id']=32037 # Vocabulary Visit: Intensive Care
        visit_obs['provider_id']=np.nan
        visit_obs['care_site_id']=np.nan
        visit_obs['visit_source_value']=0
        visit_obs['visit_source_concept_id']=0
        visit_obs['admitted_from_concept_id']=32199 # Vocabulary Visit: Information not available
        visit_obs['discharge_to_concept_id']=0
        visit_obs['discharge_to_source_value']=0
        visit_obs=visit_obs.drop_duplicates()
        visit_obs = visit_obs.rename(columns={'patientid': 'person_id'})
        visit_obs['admitted_from_source_value']=0
        visit_obs['preceding_visit_occurrence_id']=np.nan
        visit_obs['visit_concept_id']=32037 # Vocabulary Visit: Intensive Care
        visit_obs['visit_occurrence_id'] = 0
        visit_obs=visit_obs[['visit_occurrence_id','person_id','visit_concept_id','visit_start_date',
                             'visit_start_datetime','visit_end_date','visit_end_datetime','visit_type_concept_id',
                            'provider_id','care_site_id','visit_source_value','visit_source_concept_id',
                            'admitted_from_concept_id','admitted_from_source_value','discharge_to_source_value',
                            'discharge_to_concept_id','preceding_visit_occurrence_id']]
        visit_obs=visit_obs.drop_duplicates()
        visit_obs['visit_occurrence_id'] = np.arange(len(visit_obs))+(self.visit_lengths.sum()+1)
        visit_obs['visit_occurrence_id'] = visit_obs['visit_occurrence_id'].astype(int)
        self.visit_lengths=np.append(self.visit_lengths,visit_obs.shape[0])
        self.convert_df2sql(visit_obs, 'visit_occurrence')
        self.person_visit_dict=visit_obs.set_index('person_id')['visit_occurrence_id'].to_dict()
        print('\t\tVisit occurrence #{:d} done!'.format(group_number))
        pass
    
    def upload_person(self,full_court=True):
        print('\tTranslating and uploading Person table')
        start = time.process_time()
        patient=pd.read_csv(self.hirid_directory+'general_table.csv')
        patient['person_id']=patient.patientid.astype(int)
        patient['gender_concept_id']=np.where(patient['sex']=='F', 8532 , 8507)
        patient['admission_date']=pd.to_datetime(patient['admissiontime'], dayfirst=True)
        patient['birth_datetime'] = patient['admission_date'] -  pd.to_timedelta(patient['age']*365, unit = 'D')
        patient['year_of_birth'] = pd.DatetimeIndex(patient['birth_datetime']).year
        patient['month_of_birth'] = pd.DatetimeIndex(patient['birth_datetime']).month
        patient['day_of_birth'] = pd.DatetimeIndex(patient['birth_datetime']).day
        patient['death_datetime'] = np.nan
        patient['race_concept_id']=0
        patient['ethnicity_concept_id']=0
        patient['location_id']=np.nan
        patient['provider_id']=np.nan
        patient['care_site_id']=np.nan
        patient['person_source_value']=patient.patientid.astype(int)
        patient['gender_source_value']=patient['sex']
        patient['gender_source_concept_id']=0
        patient['race_source_value']=np.nan
        patient['race_source_concept_id']=0
        patient['ethnicity_source_value']=np.nan
        patient['ethnicity_source_concept_id']=0
        patient=patient[['person_id','gender_concept_id','year_of_birth','month_of_birth','day_of_birth',
                         'birth_datetime','death_datetime','race_concept_id','ethnicity_concept_id',
                        'location_id','provider_id','care_site_id','person_source_value','gender_source_value',
                        'gender_source_concept_id','race_source_value','race_source_concept_id',
                        'ethnicity_source_value','ethnicity_source_concept_id']]
        self.convert_df2sql(patient, 'person')
        self.person_upload_time=time.process_time() - start
        self.person_df=patient.copy()
        print('\tPerson upload successful!')
        pass
    

In [22]:
uploadDb=UploadDB_Optimized('D:\\MIMICIII\\HiRID\\','postgresql://postgres:jp123456$@localhost:5432/omop_cdm3','D:\\MIMICIII\\HiRID\\Translation_13Aug.csv',250)
uploadDb.translate_upload()

Beginning the translate and upload process...
Process began at: 2020-08-13 18:09:46.449215
		Translating and uploading Person table
		Person upload successful!
	Translating item 1 of 250
		Uploading observational period for group #0
		Observational period #0 done!
		Uploading visit occurrence for group #0
		Visit occurrence #0 done!
		Uploading measurements for group #0




		Measurement #0 done!
		3305338 of 3307075 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #0
		Pharmaceuticals #0 done!
		63530 of 72314 (87.85%) drugs records translated
	Group #0 completed!
	Translating item 2 of 250
		Uploading observational period for group #1
		Observational period #1 done!
		Uploading visit occurrence for group #1
		Visit occurrence #1 done!
		Uploading measurements for group #1
		Measurement #1 done!
		3681303 of 3682933 (99.96%) observations translated
		Uploading visit pharmaceuticals for group #1
		Pharmaceuticals #1 done!
		66025 of 78102 (84.54%) drugs records translated
	Group #1 completed!
	Translating item 3 of 250
		Uploading observational period for group #2
		Observational period #2 done!
		Uploading visit occurrence for group #2
		Visit occurrence #2 done!
		Uploading measurements for group #2
		Measurement #2 done!
		2632057 of 2633385 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #2
		Ph

		Observational period #19 done!
		Uploading visit occurrence for group #19
		Visit occurrence #19 done!
		Uploading measurements for group #19
		Measurement #19 done!
		2978796 of 2980473 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #19
		Pharmaceuticals #19 done!
		47921 of 55199 (86.81%) drugs records translated
	Group #19 completed!
	Translating item 21 of 250
		Uploading observational period for group #20
		Observational period #20 done!
		Uploading visit occurrence for group #20
		Visit occurrence #20 done!
		Uploading measurements for group #20
		Measurement #20 done!
		1836030 of 1837052 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #20
		Pharmaceuticals #20 done!
		29551 of 33544 (88.10%) drugs records translated
	Group #20 completed!
	Translating item 22 of 250
		Uploading observational period for group #21
		Observational period #21 done!
		Uploading visit occurrence for group #21
		Visit occurrence #21 done!
		U

		Measurement #37 done!
		2570593 of 2572114 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #37
		Pharmaceuticals #37 done!
		46425 of 52591 (88.28%) drugs records translated
	Group #37 completed!
	Translating item 39 of 250
		Uploading observational period for group #38
		Observational period #38 done!
		Uploading visit occurrence for group #38
		Visit occurrence #38 done!
		Uploading measurements for group #38
		Measurement #38 done!
		4447237 of 4449468 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #38
		Pharmaceuticals #38 done!
		90298 of 105595 (85.51%) drugs records translated
	Group #38 completed!
	Translating item 40 of 250
		Uploading observational period for group #39
		Observational period #39 done!
		Uploading visit occurrence for group #39
		Visit occurrence #39 done!
		Uploading measurements for group #39
		Measurement #39 done!
		3101104 of 3102654 (99.95%) observations translated
		Uploading visit pharmaceuti

		Observational period #56 done!
		Uploading visit occurrence for group #56
		Visit occurrence #56 done!
		Uploading measurements for group #56
		Measurement #56 done!
		3066630 of 3068278 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #56
		Pharmaceuticals #56 done!
		48054 of 56001 (85.81%) drugs records translated
	Group #56 completed!
	Translating item 58 of 250
		Uploading observational period for group #57
		Observational period #57 done!
		Uploading visit occurrence for group #57
		Visit occurrence #57 done!
		Uploading measurements for group #57
		Measurement #57 done!
		4028408 of 4030316 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #57
		Pharmaceuticals #57 done!
		71434 of 82407 (86.68%) drugs records translated
	Group #57 completed!
	Translating item 59 of 250
		Uploading observational period for group #58
		Observational period #58 done!
		Uploading visit occurrence for group #58
		Visit occurrence #58 done!
		U

		Pharmaceuticals #74 done!
		52683 of 61839 (85.19%) drugs records translated
	Group #74 completed!
	Translating item 76 of 250
		Uploading observational period for group #75
		Observational period #75 done!
		Uploading visit occurrence for group #75
		Visit occurrence #75 done!
		Uploading measurements for group #75
		Measurement #75 done!
		2740289 of 2741672 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #75
		Pharmaceuticals #75 done!
		54538 of 63492 (85.90%) drugs records translated
	Group #75 completed!
	Translating item 77 of 250
		Uploading observational period for group #76
		Observational period #76 done!
		Uploading visit occurrence for group #76
		Visit occurrence #76 done!
		Uploading measurements for group #76
		Measurement #76 done!
		2640924 of 2642405 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #76
		Pharmaceuticals #76 done!
		44715 of 51975 (86.03%) drugs records translated
	Group #76 completed!
	Transl

		Observational period #93 done!
		Uploading visit occurrence for group #93
		Visit occurrence #93 done!
		Uploading measurements for group #93
		Measurement #93 done!
		3279961 of 3281771 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #93
		Pharmaceuticals #93 done!
		56829 of 64257 (88.44%) drugs records translated
	Group #93 completed!
	Translating item 95 of 250
		Uploading observational period for group #94
		Observational period #94 done!
		Uploading visit occurrence for group #94
		Visit occurrence #94 done!
		Uploading measurements for group #94
		Measurement #94 done!
		2647074 of 2648428 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #94
		Pharmaceuticals #94 done!
		44042 of 49967 (88.14%) drugs records translated
	Group #94 completed!
	Translating item 96 of 250
		Uploading observational period for group #95
		Observational period #95 done!
		Uploading visit occurrence for group #95
		Visit occurrence #95 done!
		U

		Visit occurrence #111 done!
		Uploading measurements for group #111
		Measurement #111 done!
		2955046 of 2956624 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #111
		Pharmaceuticals #111 done!
		49089 of 55673 (88.17%) drugs records translated
	Group #111 completed!
	Translating item 113 of 250
		Uploading observational period for group #112
		Observational period #112 done!
		Uploading visit occurrence for group #112
		Visit occurrence #112 done!
		Uploading measurements for group #112
		Measurement #112 done!
		3697454 of 3699293 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #112
		Pharmaceuticals #112 done!
		69618 of 81152 (85.79%) drugs records translated
	Group #112 completed!
	Translating item 114 of 250
		Uploading observational period for group #113
		Observational period #113 done!
		Uploading visit occurrence for group #113
		Visit occurrence #113 done!
		Uploading measurements for group #113
		Measurement #113

  exec(code_obj, self.user_global_ns, self.user_ns)


		Observational period #127 done!
		Uploading visit occurrence for group #127
		Visit occurrence #127 done!
		Uploading measurements for group #127
		Measurement #127 done!
		2358202 of 2359433 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #127
		Pharmaceuticals #127 done!
		40366 of 47172 (85.57%) drugs records translated
	Group #127 completed!
	Translating item 129 of 250
		Uploading observational period for group #128
		Observational period #128 done!
		Uploading visit occurrence for group #128
		Visit occurrence #128 done!
		Uploading measurements for group #128
		Measurement #128 done!
		3825363 of 3827289 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #128
		Pharmaceuticals #128 done!
		68687 of 79829 (86.04%) drugs records translated
	Group #128 completed!
	Translating item 130 of 250
		Uploading observational period for group #129
		Observational period #129 done!
		Uploading visit occurrence for group #129
		Visit oc

		Observational period #145 done!
		Uploading visit occurrence for group #145
		Visit occurrence #145 done!
		Uploading measurements for group #145
		Measurement #145 done!
		2793979 of 2795570 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #145
		Pharmaceuticals #145 done!
		48998 of 54482 (89.93%) drugs records translated
	Group #145 completed!
	Translating item 147 of 250
		Uploading observational period for group #146
		Observational period #146 done!
		Uploading visit occurrence for group #146
		Visit occurrence #146 done!
		Uploading measurements for group #146
		Measurement #146 done!
		2853210 of 2854658 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #146
		Pharmaceuticals #146 done!
		57769 of 64175 (90.02%) drugs records translated
	Group #146 completed!
	Translating item 148 of 250
		Uploading observational period for group #147
		Observational period #147 done!
		Uploading visit occurrence for group #147
		Visit oc

		Observational period #163 done!
		Uploading visit occurrence for group #163
		Visit occurrence #163 done!
		Uploading measurements for group #163
		Measurement #163 done!
		2542570 of 2544006 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #163
		Pharmaceuticals #163 done!
		44367 of 49108 (90.35%) drugs records translated
	Group #163 completed!
	Translating item 165 of 250
		Uploading observational period for group #164
		Observational period #164 done!
		Uploading visit occurrence for group #164
		Visit occurrence #164 done!
		Uploading measurements for group #164
		Measurement #164 done!
		2618495 of 2619850 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #164
		Pharmaceuticals #164 done!
		49546 of 58535 (84.64%) drugs records translated
	Group #164 completed!
	Translating item 166 of 250
		Uploading observational period for group #165
		Observational period #165 done!
		Uploading visit occurrence for group #165
		Visit oc

		Observational period #181 done!
		Uploading visit occurrence for group #181
		Visit occurrence #181 done!
		Uploading measurements for group #181
		Measurement #181 done!
		3599960 of 3601819 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #181
		Pharmaceuticals #181 done!
		66026 of 78434 (84.18%) drugs records translated
	Group #181 completed!
	Translating item 183 of 250
		Uploading observational period for group #182
		Observational period #182 done!
		Uploading visit occurrence for group #182
		Visit occurrence #182 done!
		Uploading measurements for group #182
		Measurement #182 done!
		3320275 of 3321945 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #182
		Pharmaceuticals #182 done!
		66518 of 74956 (88.74%) drugs records translated
	Group #182 completed!
	Translating item 184 of 250
		Uploading observational period for group #183
		Observational period #183 done!
		Uploading visit occurrence for group #183
		Visit oc

		Observational period #199 done!
		Uploading visit occurrence for group #199
		Visit occurrence #199 done!
		Uploading measurements for group #199
		Measurement #199 done!
		3492491 of 3494237 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #199
		Pharmaceuticals #199 done!
		63034 of 71966 (87.59%) drugs records translated
	Group #199 completed!
	Translating item 201 of 250
		Uploading observational period for group #200
		Observational period #200 done!
		Uploading visit occurrence for group #200
		Visit occurrence #200 done!
		Uploading measurements for group #200
		Measurement #200 done!
		3806895 of 3808765 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #200
		Pharmaceuticals #200 done!
		71929 of 83143 (86.51%) drugs records translated
	Group #200 completed!
	Translating item 202 of 250
		Uploading observational period for group #201
		Observational period #201 done!
		Uploading visit occurrence for group #201
		Visit oc

		Observational period #217 done!
		Uploading visit occurrence for group #217
		Visit occurrence #217 done!
		Uploading measurements for group #217
		Measurement #217 done!
		3368114 of 3369753 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #217
		Pharmaceuticals #217 done!
		62074 of 70098 (88.55%) drugs records translated
	Group #217 completed!
	Translating item 219 of 250
		Uploading observational period for group #218
		Observational period #218 done!
		Uploading visit occurrence for group #218
		Visit occurrence #218 done!
		Uploading measurements for group #218
		Measurement #218 done!
		3039252 of 3040946 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #218
		Pharmaceuticals #218 done!
		62698 of 69310 (90.46%) drugs records translated
	Group #218 completed!
	Translating item 220 of 250
		Uploading observational period for group #219
		Observational period #219 done!
		Uploading visit occurrence for group #219
		Visit oc

		Observational period #235 done!
		Uploading visit occurrence for group #235
		Visit occurrence #235 done!
		Uploading measurements for group #235
		Measurement #235 done!
		2869823 of 2871369 (99.95%) observations translated
		Uploading visit pharmaceuticals for group #235
		Pharmaceuticals #235 done!
		52546 of 57625 (91.19%) drugs records translated
	Group #235 completed!
	Translating item 237 of 250
		Uploading observational period for group #236
		Observational period #236 done!
		Uploading visit occurrence for group #236
		Visit occurrence #236 done!
		Uploading measurements for group #236
		Measurement #236 done!
		2935334 of 2937029 (99.94%) observations translated
		Uploading visit pharmaceuticals for group #236
		Pharmaceuticals #236 done!
		57397 of 64543 (88.93%) drugs records translated
	Group #236 completed!
	Translating item 238 of 250
		Uploading observational period for group #237
		Observational period #237 done!
		Uploading visit occurrence for group #237
		Visit oc

In [10]:
print(UploadDB_Optimized.__doc__)


    Functionality: In this class you can translate the whole hiRID into Postgresql, feel free to modify it according to your needs

    Variables:

    	*hirid_directory: Folder where your HiRID files uncompressed are located. Do not modify the file tree in order to work properly

    	*postgres_add: Connection string for the postgres database

    	*mapping_file: Directory path and file name for the Usagi's mapping folder, for more information about it check:
https://www.ohdsi.org/web/wiki/doku.php?id=documentation:software:usagi
 
    	*max_group: Default value 250. Given that HiRID is divided in 250 groups the max group is the number of groups you want to upload

    	*sampling_mode: Allows the user to insert only a percentage of the data

    Example:	
    	uploadDb=UploadDB_Optimized('D:\YourFolderGoesHere\',"dbname='dbname' user='user' host='localhost' password='password' connect_timeout=1",'D:\DirectoryPath\Vocabulary_translation.csv',1)

    	uploadDb.translate_upload()
    
