In [None]:
import pyodbc
import pandas as pd
cnxn_string = r'Driver={SQL Server};Server=omop.uci.edu;Database=cords;Trusted_Connection=yes;'

In [None]:
cnxn = pyodbc.connect(cnxn_string)
patient_info = pd.DataFrame(pd.read_sql_query("""
with covid_patients as (
	select m.person_id, min(m.measurement_date) as positive_covid_test_date from cords.dbo.measurement m, cords.dbo.concept c
    where m.measurement_concept_id = c.concept_id
    and vocabulary_id='LOINC'
    and concept_code in ('94500-6', '94309-2', '94531-1', '94500-6', '94310-0', '94533-7', '94306-8')
    and m.value_as_concept_id=9191
    group by person_id
),
with_info as (
	select a.person_id,[gender_concept_id],[year_of_birth],[race_concept_id] ,[ethnicity_concept_id], location_id,
    positive_covid_test_date
    from covid_patients a 
	left join (
	SELECT distinct [person_id],[gender_concept_id] ,[year_of_birth] ,[race_concept_id] ,[ethnicity_concept_id], location_id
  FROM [CORDS].[dbo].[person]
  ) as b
  on a.person_id = b.person_id
 ),
 with_location as (
  select person_id, year_of_birth, b.location_source_value as location,gender_concept_id,race_concept_id, 
  ethnicity_concept_id, positive_covid_test_date
  from with_info a
 left join (select * from cords.dbo.location ) as b
    on b.location_id=a.location_id
 ),
 with_gender as (
 select person_id, year_of_birth, b.concept_name as gender, location, race_concept_id, ethnicity_concept_id, positive_covid_test_date
 from with_location a
 left join (select * from cords.dbo.concept ) as b
	on b.concept_id=a.gender_concept_id
 ),
 with_race as (
 select person_id, year_of_birth, gender, b.concept_name as race, location, ethnicity_concept_id, positive_covid_test_date 
 from with_gender a
 left join (select * from cords.dbo.concept ) as b
	on b.concept_id=a.race_concept_id
 ),
 with_ethnicity as (
	select person_id, year_of_birth, location, gender, race, b.concept_name as ethnicity, positive_covid_test_date from with_race a
	left join (select * from cords.dbo.concept ) as b
	on b.concept_id=a.ethnicity_concept_id
 )

select * from with_ethnicity
""",cnxn))
cnxn.close()

In [None]:
patient_info

In [None]:
cnxn = pyodbc.connect(cnxn_string)
inpatient_indicator = pd.DataFrame(pd.read_sql_query("""
with visits as (
select distinct vo.*--, m.measurement_date, c.concept_code
from measurement m
join concept c
on m.measurement_concept_id = c.concept_id
and c.vocabulary_id='LOINC' and c.concept_code IN ('94309-2', '94531-1', '94500-6' , '94310-0' , '94306-8', '94533-7')
join visit_occurrence vo
on m.person_id=vo.person_id
and (m.measurement_date between dateadd(d, -30, vo.visit_start_date) and dateadd(d,1, vo.visit_start_date)
or m.measurement_date between vo.visit_start_date and vo.visit_end_date
)
and vo.visit_concept_id=9201
and m.value_as_concept_id=9191

)

select distinct person_id from visits
""",cnxn))
cnxn.close()

In [None]:
inpatient_indicator

In [None]:
patient_info[patient_info['person_id'].isin(inpatient_indicator['person_id'].unique())]

In [None]:
#inpatient_indicator_unique_patients= inpatient_indicator.drop_duplicates('person_id', keep='first', ignore_index=True)[['person_id']]

In [None]:
inpatient_indicator_unique_patients = inpatient_indicator
inpatient_indicator_unique_patients['inpatient'] = 1

In [None]:
len(inpatient_indicator_unique_patients['person_id'].unique())

In [None]:
patient_info[['person_id']] = patient_info[['person_id']].astype(str)
inpatient_indicator_unique_patients[['person_id', 'inpatient']] = inpatient_indicator_unique_patients[['person_id', 'inpatient']].astype(str)
merged_patient_info = patient_info.merge(inpatient_indicator_unique_patients, on='person_id', how='left')
merged_patient_info.inpatient.fillna('0', inplace=True)

In [None]:
merged_patient_info.inpatient.value_counts()

In [None]:
cnxn = pyodbc.connect(cnxn_string)
bmi = pd.DataFrame(pd.read_sql_query("""
with covid_positive_patients as (
	select m.person_id from cords.dbo.measurement m, cords.dbo.concept c
    where m.measurement_concept_id = c.concept_id
    and vocabulary_id='LOINC'
    and concept_code in ('94500-6', '94309-2', '94531-1', '94500-6', '94310-0', '94533-7', '94306-8') and m.value_as_concept_id=9191
    group by person_id
),
bmi as (
	SELECT * FROM [CORDS].[dbo].[measurement]
   where person_id in (select person_id from covid_positive_patients) and measurement_concept_id = '3038553'
),
recent_bmi as ( 
	select a.person_id, avg(a.value_as_number) as bmi
	from bmi a
	inner join (select person_id, max(measurement_date) as most_recent_measurement_date from bmi group by person_id) b
	on a.measurement_date = b.most_recent_measurement_date
	group by a.person_id
)

select * from recent_bmi

""",cnxn))
cnxn.close()
bmi[['person_id']] = bmi[['person_id']].astype(str)
bmi = bmi[bmi['bmi'].astype(float) <= 80]

In [None]:
bmi[['person_id']] = bmi[['person_id']].astype(str)
patient_info[['person_id']] = patient_info[['person_id']].astype(str)
patient_info_with_bmi = merged_patient_info.merge(bmi, on='person_id', how='left')
patient_info_with_bmi

In [None]:
patient_info_with_bmi.to_csv("patient_info.csv", sep='\t', index=False)

In [None]:
patient_info_with_bmi['bmi'].describe()

In [None]:
patient_info_with_bmi.inpatient.value_counts()

In [None]:
patient_info_with_bmi.shape

In [None]:
assert patient_info_with_bmi.duplicated().any() == False