# PRQL

## Generating SQL

In [42]:
# Install prql-python
!pip install prql-python



In [43]:
import prql_python as prql

prql_query = """
    from employees
    join salaries [==emp_id]
    group [dept_id, gender] (
      aggregate [
        avg_salary = average salary
      ]
    )
"""

sql = prql.to_sql(prql_query)
print(sql)

SELECT
  dept_id,
  gender,
  AVG(salary) AS avg_salary
FROM
  employees
  JOIN salaries ON employees.emp_id = salaries.emp_id
GROUP BY
  dept_id,
  gender


## PyPRQL and Jupyter Magics

In [44]:
!pip install pyprql



In [45]:
%load_ext pyprql.magic

The pyprql.magic extension is already loaded. To reload it, use:
  %reload_ext pyprql.magic


In [46]:
%prql duckdb:///:memory:

TransactionContext Error: cannot rollback - no transaction is active
Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys([])



In [47]:
%%prql results<<
from `data/customers.csv`

Environment variable $DATABASE_URL not set, and no connect string given.
Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys([])


KeyError: 'results<<'

## Shell

In [49]:
!mkdir data
!cd data && wget https://raw.githubusercontent.com/prql/prql-query/main/examples/chinook/csv/customers.csv
!cd data && wget https://raw.githubusercontent.com/prql/prql-query/main/examples/chinook/csv/invoices.csv

--2022-12-10 23:02:09--  https://raw.githubusercontent.com/prql/prql-query/main/examples/chinook/csv/customers.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6743 (6.6K) [text/plain]
Saving to: ‘customers.csv’


2022-12-10 23:02:10 (7.12 MB/s) - ‘customers.csv’ saved [6743/6743]

--2022-12-10 23:02:11--  https://raw.githubusercontent.com/prql/prql-query/main/examples/chinook/csv/invoices.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 35719 (35K) [text/plain]
Saving to: ‘invoices.csv’


2022-12-10 23:02:11 (29.3 MB/s) - ‘

In [50]:
!wget https://github.com/prql/prql-query/releases/download/v0.0.14/pq-x86_64-unknown-linux-gnu.tar.gz && tar xvzf pq-x86_64-unknown-linux-gnu.tar.gz

--2022-12-10 23:02:21--  https://github.com/prql/prql-query/releases/download/v0.0.14/pq-x86_64-unknown-linux-gnu.tar.gz
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/549837867/ac66e7a7-72d1-4cb3-bb30-f87ab7414b11?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20221210%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20221210T230221Z&X-Amz-Expires=300&X-Amz-Signature=e90e94e1a7126a6881d92f6c7b8bb674198eb569df935750c6e8e784314345ba&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=549837867&response-content-disposition=attachment%3B%20filename%3Dpq-x86_64-unknown-linux-gnu.tar.gz&response-content-type=application%2Foctet-stream [following]
--2022-12-10 23:02:21--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/549837867/ac6

In [51]:
!./pq --from data/invoices.csv "take 5"

+------------+-------------+-------------------------------+-------------------------+--------------+---------------+-----------------+---------------------+-------+
| invoice_id | customer_id | invoice_date                  | billing_address         | billing_city | billing_state | billing_country | billing_postal_code | total |
+------------+-------------+-------------------------------+-------------------------+--------------+---------------+-----------------+---------------------+-------+
| 1          | 2           | 2009-01-01T00:00:00.000000000 | Theodor-Heuss-Straße 34 | Stuttgart    |               | Germany         | 70174               | 1.98  |
| 2          | 4           | 2009-01-02T00:00:00.000000000 | Ullevålsveien 14        | Oslo         |               | Norway          | 0171                | 3.96  |
| 3          | 8           | 2009-01-03T00:00:00.000000000 | Grétrystraat 63         | Brussels     |               | Belgium         | 1000                | 5.94  |
| 4 

In [52]:
!./pq --from data/customers.csv "take 5"

+-------------+------------+-------------+--------------------------------------------------+---------------------------------+---------------------+-------+----------------+-------------+--------------------+--------------------+--------------------------+----------------+
| customer_id | first_name | last_name   | company                                          | address                         | city                | state | country        | postal_code | phone              | fax                | email                    | support_rep_id |
+-------------+------------+-------------+--------------------------------------------------+---------------------------------+---------------------+-------+----------------+-------------+--------------------+--------------------+--------------------------+----------------+
| 1           | Luís       | Gonçalves   | Embraer - Empresa Brasileira de Aeronáutica S.A. | Av. Brigadeiro Faria Lima, 2170 | São José dos Campos | SP    | Brazil         | 

In [53]:
!./pq --from i=data/invoices.csv --from c=data/customers.csv \
    "from i | group [customer_id] (aggregate [customer_total = sum total]) | sort [-customer_total] | take 5 | join c [customer_id] | select [full_name=f'{first_name} {last_name}', customer_total]"

+--------------------+--------------------+
| full_name          | customer_total     |
+--------------------+--------------------+
| Helena Holý        | 49.620000000000005 |
| Ladislav Kovács    | 45.62              |
| Luis Rojas         | 46.62              |
| Richard Cunningham | 47.620000000000005 |
| Hugh O'Reilly      | 45.62              |
+--------------------+--------------------+


## R

In [4]:
#!sudo apt update && sudo apt upgrade -y && sudo apt install -y r-base

In [5]:
!pip install rpy2



In [6]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [7]:
!mkdir -p ~/.local/R_libs

In [10]:
%%R
install.packages("prqlr", repos = "https://eitsupi.r-universe.dev", lib="~/.local/R_libs/")

R[write to console]: trying URL 'https://eitsupi.r-universe.dev/src/contrib/prqlr_0.0.3.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 95944 bytes (93 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to conso

In [11]:
%%R
library(prqlr, lib.loc="~/.local/R_libs/")

R[write to console]: Error in library(prqlr, lib.loc = "~/.local/R_libs/") : 
  there is no package called ‘prqlr’

R[write to console]: In addition: 

R[write to console]: In install.packages("prqlr", repos = "https://eitsupi.r-universe.dev",  :
R[write to console]: 
 
R[write to console]:  installation of package ‘prqlr’ had non-zero exit status




Error in library(prqlr, lib.loc = "~/.local/R_libs/") : 
  there is no package called ‘prqlr’


RInterpreterError: Failed to parse and evaluate line 'library(prqlr, lib.loc="~/.local/R_libs/")\n'.
R error message: 'Error in library(prqlr, lib.loc = "~/.local/R_libs/") : \n  there is no package called ‘prqlr’'

In [None]:
%%R
library(prqlr)
"from mtcars | filter cyl > 6 | select [cyl, mpg]" |>
  prql_to_sql() |>
  cat()

In [None]:
%%R
# Thanks to the tidyquery package you can even convert PRQL queries to dplyr queries
install.packages("tidyquery", lib="~/.local/R_libs/")

In [None]:
%%R
library("tidyquery", lib.loc="~/.local/R_libs/")

In [None]:
%%R
"from mtcars
filter cyl > 6
select [cyl, mpg]" |>
  prql_to_sql() |>
  tidyquery::show_dplyr()

In [None]:
%%R
install.packages("nycflights13", lib="~/.local/R_libs/")

In [None]:
%%R
library("nycflights13", lib.loc="~/.local/R_libs/")

In [None]:
%%R
"
from flights
join side:left planes [==tailnum]
filter (distance | in 200..300)
filter air_time != null
group [origin, dest] (
  aggregate [
    num_flts = count,
    num_seats = (sum seats | round 0),
    avg_delay = (average arr_delay | round 0)
  ]
)
sort [-num_seats, avg_delay]
take 2
" |>
  prql_to_sql() |>
  query()