In [None]:
import pyodbc
import pandas as pd
cnxn_string = r'Driver={SQL Server};Server=omop.uci.edu;Database=cords;Trusted_Connection=yes;'

In [None]:
pd.__version__

In [None]:
cnxn = pyodbc.connect(cnxn_string)
medications = pd.DataFrame(pd.read_sql_query("""
with covid_positive_patients as (
    select m.person_id, min(m.measurement_date) as positive_covid_test_date from cords.dbo.measurement m, cords.dbo.concept c
    where m.measurement_concept_id = c.concept_id
    and vocabulary_id='LOINC'
    and concept_code in ('94500-6', '94309-2', '94531-1', '94500-6', '94310-0', '94533-7', '94306-8') and m.value_as_concept_id=9191
    group by person_id
),
drug_exposures as (
	select a.person_id, positive_covid_test_date, b.[drug_concept_id],b.[drug_exposure_start_date],b.[drug_exposure_end_date],b.[drug_type_concept_id]
	from covid_positive_patients a
	inner join (
		SELECT distinct [person_id]
		  ,[drug_concept_id]
		  ,[drug_exposure_start_date]
		  ,[drug_exposure_end_date]
		  ,[drug_type_concept_id]
	  FROM [CORDS].[dbo].[drug_exposure]) b
	  on a.person_id = b.person_id 
),
with_drug_type as (
	select de.person_id, positive_covid_test_date, de.drug_concept_id, c.concept_name as drug_type, de.drug_exposure_start_date, de.drug_exposure_end_date from drug_exposures de
		left join (select * from cords.dbo.concept ) as c
		on c.concept_id=de.drug_type_concept_id
),
with_drug_name as (
select de.person_id,positive_covid_test_date, c.concept_name as drug_name,drug_concept_id, de.drug_type, drug_exposure_start_date, drug_exposure_end_date
from with_drug_type de
		left join (select * from cords.dbo.concept ) as c
		on c.concept_id=de.drug_concept_id
),
drugs_after_covid_positive as (
select * from with_drug_name
where drug_exposure_start_date >= positive_covid_test_date 
and drug_exposure_start_date <=  dateadd(day, 30, positive_covid_test_date)
),
drug_ancestor as (
	select person_id, positive_covid_test_date, drug_exposure_start_date, drug_exposure_end_date, drug_name, drug_concept_id, drug_type, ancestor_concept_id from drugs_after_covid_positive a
	left join (
	SELECT * FROM [CORDS].[dbo].[concept_ancestor]
	) as b
	on a.drug_concept_id=b.descendant_concept_id
	where max_levels_of_separation!=0
),
ancestor_name as (
	select distinct person_id, positive_covid_test_date, drug_exposure_start_date, drug_exposure_end_date, drug_name, drug_type, c.concept_name as ingredient from drug_ancestor a
	left join (select * from cords.dbo.concept ) as c
	on 	a.ancestor_concept_id=c.concept_id
	where concept_class_id='Ingredient'
),
with_inpatient_indicator as (
	select person_id, positive_covid_test_date, drug_exposure_start_date, drug_exposure_end_date, drug_name, ingredient, 
	CASE WHEN drug_type='Inpatient administration' THEN 1 Else 0 End as inpatient_administered
	from ancestor_name
),
group_by_drug_name as (
select b.person_id, positive_covid_test_date, drug_name, ingredient, min([drug_exposure_start_date]) as drug_exposure_start_date, 
CASE WHEN max([drug_exposure_end_date]) IS NULL THEN max([drug_exposure_start_date]) ELSE max([drug_exposure_end_date]) END AS drug_exposure_end_date, max(inpatient_administered) as inpatient_administered
from with_inpatient_indicator b
group by person_id,positive_covid_test_date, drug_name, ingredient
),
group_by_ingredient as (
select b.person_id, positive_covid_test_date, ingredient, string_agg(drug_name, ', ' ) within group (order by drug_exposure_end_date desc) as drug_names, min([drug_exposure_start_date]) as ingredient_exposure_start_date, 
CASE WHEN max([drug_exposure_end_date]) IS NULL THEN max([drug_exposure_start_date]) ELSE max([drug_exposure_end_date]) END AS ingredient_exposure_end_date, max(inpatient_administered) as inpatient_administered
from group_by_drug_name b
group by person_id, positive_covid_test_date, ingredient
),
n_recent_drugs as (
select FORMAT(RowNumber, '000') as RowNumber, 
person_id,positive_covid_test_date,  ingredient, drug_names, ingredient_exposure_start_date, ingredient_exposure_end_date, inpatient_administered from (
		SELECT ROW_NUMBER() OVER (PARTITION BY person_id ORDER BY ingredient_exposure_start_date asc, ingredient_exposure_end_date asc) as RowNumber, * 
		FROM group_by_ingredient 
	) as a
	--where a.RowNumber <= 30
)
select * from n_recent_drugs
""", cnxn))
cnxn.close()

In [None]:
medications

In [None]:
((pd.to_datetime(medications['ingredient_exposure_start_date']) -pd.to_datetime( medications['positive_covid_test_date'])).dt.days).max()

In [None]:
med_copy = medications.copy()
med_copy['ingredient_month'] = pd.DatetimeIndex(med_copy['ingredient_exposure_start_date']).month
med_copy[med_copy['ingredient_month']==11]
# med_copy['ingredient_exposure_start_date'].max()
# med_copy['positive_covid_test_date'].max()

In [None]:
#Reshape medications from long to wide format
tmp = []
variables =  ['ingredient', 'drug_names', 'ingredient_exposure_start_date','ingredient_exposure_end_date', 'inpatient_administered']
max_row_num = medications['RowNumber'].max()
print(max_row_num)
ordered_column_names = [v + "_" + "%03d"%i for i in range(1, int(max_row_num) + 1) for v in variables]
medications[['person_id', 'inpatient_administered']] = medications[['person_id', 'inpatient_administered']].astype(str)
for var in variables:
    medications['tmp_idx'] = var + '_' + medications.RowNumber.astype(str)
    tmp.append(medications.pivot(index='person_id', columns='tmp_idx', values=var))
    
reshape = pd.concat(tmp, axis=1)
medications_wide = reshape[ordered_column_names]
medications_wide.reset_index(inplace=True)
medications_wide.head()

In [None]:
#medications_wide = medications_wide.astype(str)

In [None]:
medications_wide.to_csv("medications.csv", sep='\t', index=False)

In [None]:
medications_wide[medications_wide['inpatient_administered_029']!= 'nan']

In [None]:
len(medications[medications['ingredient']=='remdesivir']['person_id'].unique())

In [None]:
medications_wide