# Running queries on the OMOP data

## Install required packages

Before executing the cells below, in a terminal session install the following required R packages:

`conda create -n omop-source r-glue r-tidyverse r-data.table r-dbi  r-rpostgres r-irkernel -y`

### Connect to the SQL database

In [None]:
library(tidyverse)
library(data.table)
library(glue)
library(DBI)
library(RPostgres)

DBNAME <- #
HOST <- #
PORT <- #
PASSWORD <-  #
USER <- 'jupyter_notebook'

connection <- DBI::dbConnect(
    RPostgres::Postgres(),
    dbname = DBNAME,
    host = HOST,
    port = PORT,
    password = PASSWORD,
    user = USER,
    )

# Query 1 - Database count
This is a query that provides the total number of participants

In [None]:
omop_query <- "
    select count(distinct o.person_id) as count
    from omop_data_100kv13_covidv4.person o
    limit 10
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_100kv13_covidv4"))

all_dataset_demo

# Query 2 - Total count stratified by gender

In [None]:
omop_query <- "
    select count(distinct p.person_id) as count, c.concept_name, c.concept_id
    from omop_data_100kv13_covidv4.person p
    left join omop_data_100kv13_covidv4.concept c on c.concept_id = p.gender_concept_id
    group by c.concept_name, c.concept_id
    order by count desc
    limit 10
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_100kv13_covidv4"))

all_dataset_demo

# Query 3 - Number of participants in the database with Acure Renal Failure Syndrome

In [None]:
omop_query <- "
    select count(distinct o.person_id) as count, o.condition_concept_id
    from omop_data_100kv13_covidv4.condition_occurrence o
    where o.condition_concept_id = '197320'
    group by o.condition_concept_id
    order by count desc
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_100kv13_covidv4"))

all_dataset_demo

# Query 4 - Comorbidities
This is a simple query that provides the comorbidities with participants diagnosed with "Acute renal failure syndrome" OMOP concept_id = 197320. (30 days window before the first diagnosis with 197320)

In [None]:
source_query <- "
With CTE_index as (
select min(co.condition_start_date) as index_date,co.person_id
from omop_data_100kv13_covidv4.condition_occurrence co
left join omop_data_100kv13_covidv4.concept c on co.condition_concept_id = c.concept_id
where c.concept_id = 197320
group by co.person_id
),
CTE_comorbidities as (
select count(distinct co.person_id) as comorbities_num, c.concept_name, c.concept_id, co.condition_source_value
from CTE_index as fe
left join omop_data_100kv13_covidv4.condition_occurrence co on co.person_id = fe.person_id and co.condition_start_date between fe.index_date - 30 and fe.index_date -1
left join omop_data_100kv13_covidv4.concept c on c.concept_id = co.condition_concept_id
where c.concept_id != '0'
group by c.concept_name, c.concept_id, co.condition_source_value
order by comorbities_num desc
)
select * from CTE_comorbidities
limit 10
"

query <- dbGetQuery(connection, glue(source_query, schema = "source_data_100kv13_covidv4"))

query


# Query 5 - Adverse Events
This is a query that provides the adverse events of participants initiating 'Carboplatin' with OMOP concept_id = 1344905 (30 days window after the first exposure to 1344905)

In [None]:
source_query <- "
With CTE_index as (
select min(co.drug_exposure_start_date) as index_date,co.person_id
from omop_data_100kv13_covidv4.drug_exposure co
left join omop_data_100kv13_covidv4.concept c on co.drug_concept_id = c.concept_id
where c.concept_id = 1344905
group by co.person_id
),
CTE_adverse_events as (
select count(distinct co.person_id) as comorbities_num, c.concept_name, c.concept_id, co.condition_source_value
from CTE_index as fe
left join omop_data_100kv13_covidv4.condition_occurrence co on co.person_id = fe.person_id and co.condition_start_date between fe.index_date +1 and fe.index_date + 30
left join omop_data_100kv13_covidv4.concept c on c.concept_id = co.condition_concept_id
where c.concept_id != '0'
group by c.concept_name, c.concept_id, co.condition_source_value
order by comorbities_num desc
)
select * from CTE_adverse_events
limit 10
"

query <- dbGetQuery(connection, glue(source_query, schema = "source_data_100kv13_covidv4"))

query


# Query 6 - Write All OMOP tables to files

If you would like to inspect further the OMOP tables, you can use this command to export the OMOP tables and write them to file

In [None]:
# list available database tables from the Schema omop_data_100kv13_covidv4.person

SCHEMA = 'omop_data_100kv13_covidv4' 

connection %>%
    DBI::dbListObjects(DBI::Id(schema = SCHEMA)) %>%
    dplyr::pull(table) %>%
    purrr::map(~slot(.x, 'name')) %>%
    dplyr::bind_rows() %>% 
    head

## Write the desired OMOP table to inspect in a flat file


In [None]:
# Write the desired OMOP table to inspect in a flat file

OMOP_TABLE_NAME = 'drug_exposure'
OMOP_SCHEMA = 'omop_data_100kv13_covidv4'

source_query <- paste0("SELECT * FROM ", OMOP_SCHEMA, ".", OMOP_TABLE_NAME, ";")
query <- dbGetQuery(connection, glue(source_query, schema = "omop_data_100kv13_covidv4"))

head(query)
write.table(query, sep = ",", row.names = FALSE, quote = FALSE, file = paste0(OMOP_TABLE_NAME, '.csv'))

## Inspect the generated table

You can see the generated table in the results section in the file browser

In [None]:
OMOP_TABLE_NAME = 'drug_exposure'

list.files(pattern = ".csv")