# Running queries on the OMOP data

## Install required packages

Before executing the cells below, in a terminal session install the following required R packages:

`conda create -n omop-source r-glue r-tidyverse r-data.table r-dbi  r-rpostgres r-irkernel -y`


### Connect to the SQL database

In [None]:
library(tidyverse)
library(data.table)
library(glue)
library(DBI)
library(RPostgres)

DBNAME <- #
HOST <- #
PORT <- #
PASSWORD <-  #
USER <- 'jupyter_notebook'

connection <- DBI::dbConnect(
    RPostgres::Postgres(),
    dbname = DBNAME,
    host = HOST,
    port = PORT,
    password = PASSWORD,
    user = USER,
    )

In [None]:
all_schemas <- DBI::dbGetQuery(connection, "SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA")
all_schemas

### List all tables in the source_data_100kv13_covidv4 schema

In [None]:
sql <- "
SELECT table_schema, table_name 
FROM information_schema.tables 
WHERE table_schema = 'source_data_100kv13_covidv4'
"

dbGetQuery(connection, sql)

### Query 1: Normalised Rare Disease Terms Cohort 

In [None]:
sql <- "
SELECT DISTINCT participant_id, normalised_specific_disease
FROM source_data_100kv13_covidv4.rare_diseases_participant_disease
WHERE normalised_specific_disease IN (
    'Familial pulmonary fibrosis','Familial primary spontaneous pneumothorax',
    'Familial and multiple pulmonary arteriovenous malformations','Hereditary haemorrhagic telangiectasia'
)
"

sql_to_run <- sql

dbGetQuery(connection, sql_to_run)

### Query 2: HPO terms cohort

In [None]:
sql <- "
SELECT DISTINCT participant_id
FROM source_data_100kv13_covidv4.rare_diseases_participant_phenotype
WHERE normalised_hpo_id IN ('HP:0002206','HP:0006530','HP:0002094')
AND hpo_present IN ('Yes')
"

sql_to_run <- sql

dbGetQuery(connection, sql_to_run)

### Query 3: ICD10 terms cohort

In [None]:
sql <- "
SELECT DISTINCT participant_id
FROM source_data_100kv13_covidv4.hes_apc
WHERE diag01 LIKE 'J841'
OR diag02 LIKE 'J841'
OR diag03 LIKE 'J841'
"

sql_to_run <- sql

dbGetQuery(connection, sql_to_run)

### Query 4: Cancer type and status - Part 1

In [None]:
sql <- "
SELECT DISTINCT participant_id
FROM source_data_100kv13_covidv4.sact
WHERE analysis_group IN ('NIVOLUMAB','PEMBROLIZUMAB')
"

sql_to_run <- sql

dbGetQuery(connection, sql_to_run)