# Running queries on the OMOP data

## Install required packages

Before executing the cells below, in a terminal session install the following required R packages:

`conda create -n omop-source r-glue r-tidyverse r-data.table r-dbi  r-rpostgres r-irkernel -y`

### Connect to the SQL database

In [None]:
library(tidyverse)
library(data.table)
library(glue)
library(DBI)
library(RPostgres)

DBNAME <- #
HOST <- #
PORT <- #
PASSWORD <-  #
USER <- 'jupyter_notebook'

connection <- DBI::dbConnect(
    RPostgres::Postgres(),
    dbname = DBNAME,
    host = HOST,
    port = PORT,
    password = PASSWORD,
    user = USER,
    )

In [None]:
# SQL query - Demographics
This is a simple query that provides the total number of participants

In [None]:
omop_query <- "
    select count(distinct o.person_id) as count
    from omop_data_sawcer_v1.person o
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a simple query that provides the total of the counts stratified by race

In [None]:
omop_query <- "
    select count(distinct p.person_id) as count, p.race_source_value, p.race_concept_id
    from omop_data_sawcer_v1.person p
    group by p.race_source_value, p.race_concept_id
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

In [None]:
This is a simple query that provides the total of the counts stratified by gender

In [None]:
omop_query <- "
    select count(distinct p.person_id) as count, p.gender_source_value, p.gender_concept_id, c.concept_name
    from omop_data_sawcer_v1.person p
    left join omop_data_sawcer_v1.concept c on c.concept_id = p.gender_concept_id
    group by p.gender_source_value, p.gender_concept_id, c.concept_name
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a simple query that provides the total of the counts stratified by condition

In [None]:
omop_query <- "
    select count(distinct co.person_id) as count, co.condition_source_value, co.condition_concept_id, c.concept_name
    from omop_data_sawcer_v1.condition_occurrence co
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.condition_concept_id
    group by co.condition_source_value, co.condition_concept_id, c.concept_name
    order by count desc
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a query that provides all the participants with their respective condition

In [None]:
omop_query <- "
    select p.person_source_value, co.condition_source_value, co.condition_concept_id, c.concept_name
    from omop_data_sawcer_v1.condition_occurrence co
    left join omop_data_sawcer_v1.person p on p.person_id = co.person_id
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.condition_concept_id
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a query that provides the participants that were diagnosed with Primary progressive multiple sclerosis (condition_concept_id = 4178929)

In [None]:
omop_query <- "
    select p.person_source_value, co.condition_source_value, co.condition_concept_id, c.concept_name
    from omop_data_sawcer_v1.condition_occurrence co
    left join omop_data_sawcer_v1.person p on p.person_id = co.person_id
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.condition_concept_id
    where co.condition_concept_id = 4178929
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a query that provides all the participants with their respective specimen type

In [None]:
omop_query <- "
    select p.person_source_value, co.specimen_source_value, co.specimen_concept_id, c.concept_name
    from omop_data_sawcer_v1.specimen co
    left join omop_data_sawcer_v1.person p on p.person_id = co.person_id
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.specimen_concept_id
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

In [None]:
This is a query that provides all the participants with their respective MSSS(as provided in the source data)

In [None]:
omop_query <- "
    select p.person_source_value, co.observation_source_value, co.value_as_number
    from omop_data_sawcer_v1.observation co
    left join omop_data_sawcer_v1.person p on p.person_id = co.person_id
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.observation_concept_id
    where co.observation_source_value = 'MSSS'
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo

This is a query that provides all the participants with their respective MSSS where value > 7

In [None]:
omop_query <- "
    select p.person_source_value, co.observation_source_value, co.value_as_number
    from omop_data_sawcer_v1.observation co
    left join omop_data_sawcer_v1.person p on p.person_id = co.person_id
    left join omop_data_sawcer_v1.concept c on c.concept_id = co.observation_concept_id
    where co.observation_source_value = 'MSSS' and co.value_as_number > 7
"

all_dataset_demo <- dbGetQuery(connection, glue(omop_query, schema = "omop_data_sawcer_v1"))

all_dataset_demo