# Running queries on the source data

## Install required packages

Before executing the cells below, in a terminal session install the following required R packages:

`conda create -n omop-source r-glue r-tidyverse r-data.table r-dbi  r-rpostgres r-irkernel -y`

In [None]:
library(tidyverse)
library(data.table)
library(glue)
library(DBI)
library(RPostgres)

DBNAME <- #
HOST <- #
PORT <- #
PASSWORD <-  #
USER <- 'jupyter_notebook'

connection <- DBI::dbConnect(
    RPostgres::Postgres(),
    dbname = DBNAME,
    host = HOST,
    port = PORT,
    password = PASSWORD,
    user = USER,
    )

<br>  

## Show all tables in the schema


In [None]:
dbGetQuery(connection, "
    SELECT DISTINCT table_schema, table_name
    FROM information_schema.columns
    WHERE table_schema = 'source_data_ukb_v1' and table_name ilike '%death%';
")

<br>  

## Select patients from `baseline_characteristics` table

In [None]:
dbGetQuery(connection, "
    SELECT * 
    FROM baseline_characteristics_100094
    LIMIT 5;
")

## Count patients with primary hypertension

In [None]:
dbGetQuery(connection, "
    SELECT count(distinct f_eid) 
    FROM  circulatory_system_disorders_2409 
    WHERE f_131286_0_0 is not null
")

## Cross querying with OMOP tables

OMOP tables and source tables can be joined using the relationship `source_data.<table_name>.f_eid` = `omop_data.person.person_source_value`:

In [None]:
dbGetQuery(connection, "
    SELECT f_eid, gender_concept_id, race_concept_id
    FROM  source_data_ukb_v1.circulatory_system_disorders_2409 c 
    LEFT JOIN omop_data_ukb_v1.person p on c.f_eid::varchar = p.person_source_value
    WHERE f_131286_0_0 is not null
")