### Sample SQL Queries

A variety of differrent queries from python using mostly sql, sql and python, and pandas.

Salt to taste.

In [5]:
import json
from contextlib import closing
import psycopg2
import psycopg2.extras
import pandas as pd

conn_string = "host=postgres dbname=wisdom  user=postgres"

In [6]:
# Query directly into a pandas dataframe a summary of sign counts
with closing(psycopg2.connect(conn_string)) as connection:
    dataframe = pd.io.sql.read_sql("select patients.sign, count(id) from patients group by patients.sign", connection)
dataframe

Unnamed: 0,sign,count
0,Capricorn,48
1,Virgo,24
2,Taurus,19
3,Scorpio,12
4,Gemini,12
5,Sagittarius,17
6,Leo,13
7,Libra,23
8,Cancer,25
9,Aries,21


In [13]:
# All Libras between 21 and 40 with BRCA1 and PTEN using postgres jsonb extensions
with closing(psycopg2.connect(conn_string)) as connection, \
    closing(connection.cursor(cursor_factory=psycopg2.extras.DictCursor)) as cursor:

    cursor.execute("""
        select patients.*, report->'variants' as variants
        from patients
        left join reports on (patients.id = reports.patient_id)
        where patients.sign='Libra' and patients.age >= 21 and patients.age <= 40
        and report @> '{ "variants": [{ "gene": "BRCA1" }] }' and report @> '{ "variants": [{ "gene": "PTEN" }] }'
        """)
    
    print("Found", cursor.rowcount, "Libras between 21 and 40 with BRCA1 and PTEN")
    for patient in cursor:
        print(patient["id"], [v["gene"] for v in patient["variants"]])

Found 2 Libras between 21 and 40 with BRCA1 and PTEN
8a80681d-60f5-4baf-96ce-7d04aa2f505f ['PTEN', 'PALB2', 'BRIP1', 'ATM', 'MSH2', 'ATM', 'BRCA1', 'ATM']
64174e50-987d-4a29-9122-bc8b0c1931b9 ['ATM', 'PTEN', 'PALB2', 'PALB2', 'MLH1', 'BRCA1', 'MSH6', 'ATM']


In [4]:
# All Libras between 21 and 40 with BRCA1 and PTEN using multiple simpler queries and python
with closing(psycopg2.connect(conn_string)) as connection, \
    closing(connection.cursor(cursor_factory=psycopg2.extras.DictCursor)) as cursor:
    
    # First get all the patients that are Libra and between 21 and 40
    cursor.execute("select * from patients where sign='Libra' and age >= 21 and age <= 40")
    patients = cursor.fetchall()
    print("Found", len(patients), "Libra's between 21 and 40")
    
    # And then get their variant report and print only if it includes BRCA1and PTEN
    print("Those with both BRCA1 and PTEN:")
    for patient in patients:
        cursor.execute("select report->'variants' from reports where reports.patient_id='{}'".format(patient["id"]))
        variants = cursor.fetchall()
        genes = [v["gene"] for v in variants[0][0]]
        if 'BRCA1' in genes and 'PTEN' in genes:
            print(patient["id"], genes)

Found 5 Libra's between 21 and 40
Those with both BRCA1 and PTEN:
8a80681d-60f5-4baf-96ce-7d04aa2f505f ['PTEN', 'PALB2', 'BRIP1', 'ATM', 'MSH2', 'ATM', 'BRCA1', 'ATM']
64174e50-987d-4a29-9122-bc8b0c1931b9 ['ATM', 'PTEN', 'PALB2', 'PALB2', 'MLH1', 'BRCA1', 'MSH6', 'ATM']
