# PRQL

## Generating SQL

You can do this in your browser, go to [https://prql-lang.org/playground/]() now!

## Python

In [34]:
#!pip install prql-python

In [30]:
import prql_python as prql

prql_query = """
    from employees
    join salaries [==emp_id]
    group [dept_id, gender] (
        aggregate [
            avg_salary = average salary
        ]
    )
"""

sql = prql.to_sql(prql_query)
print(sql)

SELECT
  dept_id,
  gender,
  AVG(salary) AS avg_salary
FROM
  employees
  JOIN salaries ON employees.emp_id = salaries.emp_id
GROUP BY
  dept_id,
  gender
ORDER BY
  avg_salary DESC
LIMIT
  5


In [33]:
prql_query = """
    from employees
    join salaries [==emp_id]
    group [dept_id] (
        sort [-salary]
        take 5
    )
"""

sql = prql.to_sql(prql_query)
print(sql)

WITH table_1 AS (
  SELECT
    employees.*,
    salaries.*,
    ROW_NUMBER() OVER (
      PARTITION BY dept_id
      ORDER BY
        salary DESC
    ) AS _expr_0
  FROM
    employees
    JOIN salaries ON employees.emp_id = salaries.emp_id
)
SELECT
  *,
  *
FROM
  table_1
WHERE
  _expr_0 <= 5


## PyPRQL and Jupyter Magics

In [3]:
#!pip install pyprql

In [4]:
%load_ext pyprql.magic

In [35]:
%%prql duckdb:///:memory:
from `data/customers.csv`
take 5

Done.
   customer_id first_name    last_name  \
0            1       Luís    Gonçalves   
1            2     Leonie       Köhler   
2            3   François     Tremblay   
3            4      Bjørn       Hansen   
4            5  František  Wichterlová   

                                            company  \
0  Embraer - Empresa Brasileira de Aeronáutica S.A.   
1                                              None   
2                                              None   
3                                              None   
4                                  JetBrains s.r.o.   

                           address                 city state         country  \
0  Av. Brigadeiro Faria Lima, 2170  São José dos Campos    SP          Brazil   
1          Theodor-Heuss-Straße 34            Stuttgart  None         Germany   
2                1498 rue Bélanger             Montréal    QC          Canada   
3                 Ullevålsveien 14                 Oslo  None          Norway   
4    

## Shell

In [8]:
#!wget https://github.com/prql/prql-query/releases/download/v0.0.14/pq-x86_64-unknown-linux-gnu.tar.gz \
#    && tar xvzf pq-x86_64-unknown-linux-gnu.tar.gz \
#    && rm pq-x86_64-unknown-linux-gnu.tar.gz

In [9]:
!./pq --from data/invoices.csv "take 5"

+------------+-------------+-------------------------------+-------------------------+--------------+---------------+-----------------+---------------------+-------+
| invoice_id | customer_id | invoice_date                  | billing_address         | billing_city | billing_state | billing_country | billing_postal_code | total |
+------------+-------------+-------------------------------+-------------------------+--------------+---------------+-----------------+---------------------+-------+
| 1          | 2           | 2009-01-01T00:00:00.000000000 | Theodor-Heuss-Straße 34 | Stuttgart    |               | Germany         | 70174               | 1.98  |
| 2          | 4           | 2009-01-02T00:00:00.000000000 | Ullevålsveien 14        | Oslo         |               | Norway          | 0171                | 3.96  |
| 3          | 8           | 2009-01-03T00:00:00.000000000 | Grétrystraat 63         | Brussels     |               | Belgium         | 1000                | 5.94  |
| 4 

In [51]:
# Say we now want to get the names of the top 5 customers
!./pq --from i=data/invoices.csv \
    "from i"

+------------+-------------+-------------------------------+------------------------------------------+---------------------+---------------+-----------------+---------------------+-------+
| invoice_id | customer_id | invoice_date                  | billing_address                          | billing_city        | billing_state | billing_country | billing_postal_code | total |
+------------+-------------+-------------------------------+------------------------------------------+---------------------+---------------+-----------------+---------------------+-------+
| 1          | 2           | 2009-01-01T00:00:00.000000000 | Theodor-Heuss-Straße 34                  | Stuttgart           |               | Germany         | 70174               | 1.98  |
| 2          | 4           | 2009-01-02T00:00:00.000000000 | Ullevålsveien 14                         | Oslo                |               | Norway          | 0171                | 3.96  |
| 3          | 8           | 2009-01-03T00:00:00.0

In [10]:
!./pq --from i=data/invoices.csv --from c=data/customers.csv \
    "from i | group [customer_id] (aggregate [customer_total = sum total]) | sort [-customer_total] | take 5 | join c [customer_id] | select [full_name=f'{first_name} {last_name}', customer_total]"

+--------------------+--------------------+
| full_name          | customer_total     |
+--------------------+--------------------+
| Helena Holý        | 49.620000000000005 |
| Ladislav Kovács    | 45.62              |
| Luis Rojas         | 46.62              |
| Richard Cunningham | 47.620000000000005 |
| Hugh O'Reilly      | 45.62              |
+--------------------+--------------------+


## R

In [11]:
#!sudo apt update && sudo apt upgrade -y && sudo apt install -y r-base
#!sudo R -q -e "install.packages('dplyr', dependencies = TRUE)"
#!sudo R -q -e "install.packages('tidyquery', dependencies = TRUE)"

In [12]:
# prqlr needs to be installed into a user directory because cargo isn't available under sudo
#!mkdir -p ~/.local/R_libs
#!R -q -e 'install.packages("prqlr", repos = "https://eitsupi.r-universe.dev", lib="~/.local/R_libs/")'

In [13]:
#!pip install rpy2

In [14]:
%load_ext rpy2.ipython

In [27]:
%%R
library(prqlr, lib.loc="~/.local/R_libs/")
"
from mtcars
filter cyl > 6
sort [-mpg]
select [cyl, mpg]
" |> prql_to_sql() |> cat()

SELECT
  cyl,
  mpg
FROM
  mtcars
WHERE
  cyl > 6
ORDER BY
  mpg DESC

In [26]:
%%R
# Thanks to the tidyquery package you can query dataframes directly
library("tidyquery")
"
from mtcars
filter cyl > 6
sort [-mpg]
select [cyl, mpg]
" |> prql_to_sql() |> query()

                    cyl  mpg
Pontiac Firebird      8 19.2
Hornet Sportabout     8 18.7
Merc 450SL            8 17.3
Merc 450SE            8 16.4
Ford Pantera L        8 15.8
Dodge Challenger      8 15.5
Merc 450SLC           8 15.2
AMC Javelin           8 15.2
Maserati Bora         8 15.0
Chrysler Imperial     8 14.7
Duster 360            8 14.3
Camaro Z28            8 13.3
Cadillac Fleetwood    8 10.4
Lincoln Continental   8 10.4


In [25]:
%%R
# or even convert PRQL queries to dplyr queries
library("tidyquery")
"
from mtcars
filter cyl > 6
select [cyl, mpg]
sort [-mpg]
" |>
  prql_to_sql() |>
  tidyquery::show_dplyr()

mtcars %>%
  filter(cyl > 6) %>%
  select(cyl, mpg) %>%
  arrange(dplyr::desc(mpg))

```
from mtcars
filter cyl > 6
select [cyl, mpg]
sort [-mpg]
```