In [1]:
import os

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
from utils import *

In [2]:
from django.db.models import (
    Q,
    F,
    Case,
    When,
    Count,
    Func,
    Min,
    Max,
    Sum,
    Avg,
    Value,
    OuterRef,
    Subquery,
    Window,
    CharField,
    FloatField,
)
from django.db.models.lookups import Exact, GreaterThanOrEqual
from django.db.models.functions import Concat, Cast, Round, Length, Lag
from django.db import connection
from customer_db.models import Provinces, Patients, Doctors, Admissions

In [3]:
patient_fields = [
    "first_name",
    "last_name",
    "gender",
    "birth_date",
    "city",
    "province_id",
    "allergies",
    "height",
    "weight",
]

## Hard 1

Show all of the patients grouped into weight groups.
Show the total amount of patients in each weight group.
Order the list by the weight group decending.

For example, if they weight 100 to 109 they are placed in the 100 weight group, 110-119 = 110 weight group, etc.

In [4]:
qstr = """
SELECT
  ROUND(weight / 10, 0) * 10 AS weight_class,
  COUNT(patient_id) AS patient_count
FROM patients
GROUP BY weight_class
ORDER BY weight_class DESC
"""
sqlq = sql_raw(qstr)

In [5]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [6]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 2
Show patient_id, weight, height, isObese from the patients table.

Display isObese as a boolean 0 or 1.
Obese is defined as weight(kg)/(height(m)2) >= 30.
weight is in units kg.
height is in units cm.

We will do 24

In [7]:
qstr = """
SELECT
  patient_id AS id,
  weight,
  height,
  CASE
    WHEN weight * 10000/ (height * height) >= 24 THEN 1
    ELSE 0
  END AS isObese
FROM patients
"""
sqlq = sql_raw(qstr)

In [8]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [9]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 3
Show patient_id, first_name, last_name, and attending doctor's specialty.
Show only the patients who has a diagnosis as 'Epilepsy' and the doctor's first name is 'Lisa'

Check patients, admissions, and doctors tables for required information.
We will do 'Anemia'

In [10]:
qstr = """
SELECT
  p.patient_id,
  p.first_name,
  p.last_name,
  speciality
FROM admissions AS a
  JOIN patients p ON p.patient_id = a.patient_id
  JOIN doctors d ON a.attending_doctor_id = d.doctor_id
WHERE
  diagnosis = 'Anemia'
  and d.first_name = 'Lisa'
"""
sqlq = sql_raw(qstr)

In [11]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [12]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 4
All patients who have gone through admissions, can see their medical documents on our site. Those patients are given a temporary password after their first admission. Show the patient_id and temp_password.

The password must be the following, in order:
1. patient_id
2. the numerical length of patient's last_name
3. year of patient's birth_date

In [13]:
# use YEAR(birth_date) in other databases
qstr = """
SELECT
  p.patient_id,
  p.patient_id || LENGTH(last_name) || strftime('%Y', birth_date) AS temp_password
FROM patients p
WHERE patient_id IN (
    SELECT DISTINCT(patient_id)
    FROM admissions
  )
"""
sqlq = sql_raw(qstr)

In [14]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [15]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 5
Each admission costs \\$50 for patients without insurance, and \\$10 for patients with insurance. All patients with an even patient_id have insurance.

Give each patient a 'Yes' if they have insurance, and a 'No' if they don't have insurance. Add up the admission_total cost for each has_insurance group.

In [16]:
qstr = """
SELECT has_insurance, SUM(insurance_cost) AS total_insurance_cost
FROM(
    SELECT
      CASE
        WHEN patient_id % 2 = 0 THEN "Yes"
        ELSE "No"
      END AS has_insurance,
      CASE
        WHEN patient_id % 2 = 0 THEN 10
        ELSE 50
      END AS insurance_cost
    FROM admissions
  )
GROUP BY has_insurance;
"""
sqlq = sql_raw(qstr)

In [17]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [18]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 6
Show the provinces that has more patients identified as 'M' than 'F'. Must only show full province_name

In [19]:
qstr = """
SELECT pr.province_name
FROM patients AS pa
  JOIN province_names AS pr ON pa.province_id = pr.province_id
GROUP BY pr.province_name
HAVING SUM(gender = 'M') > SUM(gender = 'F')
"""
sqlq = sql_raw(qstr)

In [20]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [21]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 7

We are looking for a specific patient. Pull all columns for the patient who matches the following criteria:
- First_name contains an 'r' after the first two letters.
- Identifies their gender as 'F' (We will do 'M')
- Born in February, May, or December
- Their weight would be between 60kg and 80kg
- Their patient_id is an odd number  (We will do even)
- They are from the city 'Kingston'  (We will do 'Vancouver')

In [22]:
qstr = """
SELECT *
FROM patients
WHERE
  first_name LIKE "__r%"
  AND gender = 'M'
  AND (
    CAST(strftime('%m', birth_date) AS INT) IN (2, 5, 12)
  )
  AND weight between 60 AND 80
  AND patient_id % 2 = 0
  AND city = 'Vancouver'
"""
sqlq = sql_raw(qstr)

In [23]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [24]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 8
Show the percent of patients that have 'M' as their gender. Round the answer to the nearest hundreth number and in percent form.

In [25]:
qstr = """
SELECT
  ROUND(
    CAST(AVG(gender = 'M') as FLOAT)  * 100,
    2
  ) || '%' AS pct
FROM patients
"""
sqlq = sql_raw(qstr)

In [26]:
# Subclassing Max and setting contains_aggregate = False also works
class NonAggrAvg(Avg):
    contains_aggregate = False

ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [27]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 9
For each day display the total amount of admissions on that day. Display the amount changed from the previous date.

In [28]:
qstr = """
SELECT
  admission_date,
  daily_admissions,
  daily_admissions - prev_admissions AS admission_diff
FROM (
    SELECT
      admission_date,
      COUNT(*) daily_admissions,
      LAG(count(*), 1) OVER (
        ORDER BY
          admission_date
      ) AS prev_admissions
    FROM admissions
    GROUP BY admission_date
  )
"""
sqlq = sql_raw(qstr)

In [29]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [30]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 10
Show the total number of admissions

In [31]:
qstr = """
select province_name
from province_names
order by
  province_name = 'Ontario' desc,
  province_name
"""
sqlq = sql_raw(qstr)

In [32]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [33]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3


## Hard 11
We need a breakdown for the total amount of admissions each doctor has started each year. Show the doctor_id, doctor_full_name, specialty, year, total_admissions for that year.

In [34]:
qstr = """
SELECT
  doctor_id,
  first_name || ' ' || last_name AS full_name,
  speciality,
  CAST(strftime('%Y', admission_date) AS INT) AS current_year,
  Count(strftime('%Y', admission_date)) AS patients_attended
FROM admissions a
  JOIN doctors d ON a.attending_doctor_id = d.doctor_id
GROUP BY current_year, doctor_id
"""
sqlq = sql_raw(qstr)

In [35]:
ormq =  Patients.objects.values("id")
equal(sqlq, ormq)
print_sql(ormq)

Unequal ❌
SELECT "patients"."patient_id"
FROM "patients"


In [36]:
orm_to_df(ormq[:3])

Unnamed: 0,id
0,1
1,2
2,3
