In [8]:
from common import *

cursor = connect()

# 创建索引

语法
```
CREATE INDEX [IF NOT EXISTS] index_name
ON table_name(column1, column2, ...);
```

In [2]:
# 查询
sql = """
SELECT 
  address_id, 
  address, 
  district, 
  phone 
FROM 
  address 
WHERE 
  phone = '223664661973';
"""
run_sql(cursor, sql)

   address_id             address   district         phone
0          85  320 Baiyin Parkway  Mahajanga  223664661973


In [3]:
# 使用 expand 语法
sql = """
EXPLAIN SELECT 
  address_id, 
  address, 
  district, 
  phone 
FROM 
  address 
WHERE 
  phone = '223664661973';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on address  (cost=0.00..15.54 rows=1 ...
1     Filter: ((phone)::text = '223664661973'::text)


上述输出表明查询优化器必须对 `address` 表执行顺序扫描。

In [4]:
# 创建索引
sql = """
CREATE INDEX idx_address_phone 
ON address(phone);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4ad3d0>

当运行 `CREATE INDEX` 语句时，PostgreSQL 会扫描 `address` 表，从 `phone` 列中提取数据，并将其插入到索引 `idx_address_phone` 中。这个过程称之为索引构建。在构建过程中，PostgreSQL 会锁定 `address` 表，以防止其他事务对其进行修改。因此，如果 `address` 表很大，那么索引构建可能需要一些时间。

In [5]:
sql = """
SELECT 
  indexname, 
  indexdef 
FROM 
  pg_indexes 
WHERE 
  tablename = 'address';
"""
run_sql(cursor, sql)

           indexname                                           indexdef
0       address_pkey  CREATE UNIQUE INDEX address_pkey ON public.add...
1     idx_fk_city_id  CREATE INDEX idx_fk_city_id ON public.address ...
2  idx_address_phone  CREATE INDEX idx_address_phone ON public.addre...


In [6]:
# 使用 expand 语法
sql = """
EXPLAIN SELECT 
  address_id, 
  address, 
  district, 
  phone 
FROM 
  address 
WHERE 
  phone = '223664661973';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Index Scan using idx_address_phone on address ...
1    Index Cond: ((phone)::text = '223664661973':...


# UNIQUE 索引

 PostgreSQL 唯一索引以确保一列或多列中值的唯一性。
 
语法
```
CREATE UNIQUE INDEX index_name
ON table_name (column [, ...]) 
[ NULLS [ NOT ] DISTINCT ];
```

`NULL NOT DISTINCT` 选项将空值视为相等值，而 `NULLS DISTINCT` 则将空值视为不同值。默认情况下，语句使用 `NULLS DISTINCT`，这意味着索引列可能包含多个空值。只有 `B-tree` 索引支持唯一索引。

如果为列定义了唯一索引，该列就不能存储具有相同值的多条记录。如果为两个或多个列定义了唯一索引，这些列中的组合值就不能在多行中重复。为表定义主键或唯一约束时，PostgreSQL 会自动创建相应的唯一索引。

## 主键和 unique 约束的唯一索引

In [15]:
sql = """
CREATE TABLE employees (
    employee_id SERIAL PRIMARY KEY,
    first_name VARCHAR(255) NOT NULL,
    last_name VARCHAR(255) NOT NULL,
    email VARCHAR(255) UNIQUE
);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4acf50>

上述语句，`employee_id` 是主键，`email` 列是唯一约束，因此 PostgreSQL 创建了两个唯一索引。

In [10]:
sql = """
SELECT 
    tablename, 
    indexname, 
    indexdef 
FROM 
    pg_indexes 
WHERE 
    tablename = 'employees';
"""
run_sql(cursor, sql)

   tablename            indexname  \
0  employees       employees_pkey   
1  employees  employees_email_key   

                                            indexdef  
0  CREATE UNIQUE INDEX employees_pkey ON public.e...  
1  CREATE UNIQUE INDEX employees_email_key ON pub...  


In [11]:
sql = """
ALTER TABLE employees
ADD mobile_phone VARCHAR(20);

CREATE UNIQUE INDEX idx_employees_mobile_phone
ON employees(mobile_phone);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4ad3d0>

## 单列创建唯一索引

In [12]:
sql = """
INSERT INTO employees(first_name, last_name, email, mobile_phone)
VALUES ('John','Doe','john.doe@postgresqltutorial.com', '(408)-555-1234');
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4ad3d0>

In [13]:
sql = """
INSERT INTO employees(first_name, last_name, email, mobile_phone)
VALUES ('Jane','Doe','jane.doe@postgresqltutorial.com', '(408)-555-1234');
"""
cursor.execute(sql)

UniqueViolation: 重复键违反唯一约束"idx_employees_mobile_phone"
DETAIL:  键值"(mobile_phone)=((408)-555-1234)" 已经存在

## 多列创建唯一索引

In [16]:
sql = """
ALTER TABLE employees
ADD work_phone VARCHAR(20),
ADD extension VARCHAR(5);

CREATE UNIQUE INDEX idx_employees_workphone
ON employees(work_phone, extension);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4acf50>

In [17]:
sql = """
INSERT INTO employees(first_name, last_name, work_phone, extension)
VALUES('Lily', 'Bush', '(408)-333-1234','1212');
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4acf50>

In [18]:
sql = """
INSERT INTO employees(first_name, last_name, work_phone, extension)
VALUES('Joan', 'Doe', '(408)-333-1234','1211');
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x264bf4acf50>

上述两条语句都会执行成功，因为 `work_phone` 和 `extension` 列的组合值不同。

In [19]:
sql = """
INSERT INTO employees(first_name, last_name, work_phone, extension)
VALUES('Tommy', 'Stark', '(408)-333-1234','1211');
"""
cursor.execute(sql)

UniqueViolation: 重复键违反唯一约束"idx_employees_workphone"
DETAIL:  键值"(work_phone, extension)=((408)-333-1234, 1211)" 已经存在

# 表达式索引（Indexes on Expressions）

语法
```
CREATE INDEX index_name 
ON table_name (expression);
```

定义索引表达式后，当该表达式出现在 SQL 语句的 WHERE 子句或 ORDER BY 子句中时，PostgreSQL 将考虑使用该索引。请注意，维护表达式的索引可能会产生额外的成本。 PostgreSQL 在插入或更新期间计算每行的表达式，并利用结果来构建索引。

In [3]:
sql = """
SELECT 
    customer_id, 
    first_name, 
    last_name 
FROM 
    customer 
WHERE 
    last_name = 'Purdy';
"""
run_sql(cursor, sql)

   customer_id first_name last_name
0          333     Andrew     Purdy


In [4]:
# 查看具体查询的过程
sql = """
EXPLAIN
SELECT 
    customer_id, 
    first_name, 
    last_name 
FROM 
    customer 
WHERE 
    last_name = 'Purdy';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Index Scan using idx_last_name on customer  (c...
1    Index Cond: ((last_name)::text = 'Purdy'::text)


In [5]:
# 使用 LOWER 函数，发现查询又变回了顺序扫描
sql = """
EXPLAIN
SELECT 
    customer_id, 
    first_name, 
    last_name 
FROM 
    customer 
WHERE 
    LOWER(last_name) = 'purdy';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on customer  (cost=0.00..17.98 rows=3...
1    Filter: (lower((last_name)::text) = 'purdy':...


In [6]:
# 增加表达式索引
sql = """
CREATE INDEX idx_ic_last_name
ON customer(LOWER(last_name));
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x1b411c04d10>

In [7]:
# 再次查询
sql = """
EXPLAIN
SELECT 
    customer_id, 
    first_name, 
    last_name 
FROM 
    customer 
WHERE 
    LOWER(last_name) = 'purdy';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Bitmap Heap Scan on customer  (cost=4.30..11.1...
1    Recheck Cond: (lower((last_name)::text) = 'p...
2    ->  Bitmap Index Scan on idx_ic_last_name  (...
3          Index Cond: (lower((last_name)::text) ...


#

# 部分索引（Partial Index）

使用 PostgreSQL 部分索引根据指定条件根据表中的行子集创建索引。部分索引是建立在索引列的数据子集上的索引。优点是部分索引可以增强查询性能，同时减小索引大小。它还可以提升表更新速度，因为 PostgreSQL 不需要在所有情况下维护索引。

语法
```
CREATE [IF NOT EXISTS] INDEX index_name
ON table_name(column1, column2, ...)
WHERE predicate;
```

In [8]:
sql = """
CREATE INDEX customer_active
ON customer(active)
WHERE active = 0;
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x1b411c04d10>

In [9]:
sql = """
EXPLAIN SELECT 
  * 
FROM 
  customer 
WHERE 
  active = 0;
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Index Scan using customer_active on customer  ...


# 多列索引（Multicolumn Indexes）

多列索引通常称为复合索引、组合索引或串联索引。它们是在多个列上创建的索引。多列索引的目的是提高查询性能，特别是在多列上的查询。它最多可以包含 32 列。只有 `B-tree`, `GIN`, `GIST` 和 `BRIN` 索引支持多列索引。

语法
```
CREATE INDEX [IF NOT EXISTS] index_name
ON table_name(column1, column2, ...);
```

定义多列索引时，应将 `WHERE` 子句中经常使用的列放在列列表的开头，然后是 `WHERE` 子句中使用频率较低的列。例如上述例子中。查询优化器在以下情况下会考虑使用索引：
> WHERE column1 = v1 AND column2 = v2 AND column3 = v3;
>
或
> WHERE column1 = v1 AND column2 = v2;
> 
或
> WHERE column1 = v1;
> 
然而，如果查询中没有列 `column1`，那么查询优化器将不会使用多列索引。比如：
> WHERE column3 = v3;
> 
和
> WHERE column2 = v2 and column3 = v3;    


In [10]:
sql = """
CREATE TABLE people (
    id INT GENERATED BY DEFAULT AS IDENTITY,
    first_name VARCHAR(50) NOT NULL,
    last_name VARCHAR(50) NOT NULL
);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x1b411c04d10>

In [11]:
# 加载 pg_18_indexes_data 的数据并插入到 people 中
with open('pg_18_indexes_data', 'r') as f:
    commands = f.read()
    cursor.execute(commands)


In [12]:
sql = """
EXPLAIN SELECT 
  id, 
  first_name, 
  last_name 
FROM 
  people 
WHERE 
  last_name = 'Adams';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on people  (cost=0.00..83.88 rows=9 w...
1        Filter: ((last_name)::text = 'Adams'::text)


In [13]:
# 建立组合索引
sql = """
CREATE INDEX idx_people_names 
ON people (last_name, first_name);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x1b411c04d10>

In [14]:
# 查询器会使用索引进行优化查询
sql = """
EXPLAIN SELECT 
  id, 
  first_name, 
  last_name 
FROM 
  people 
WHERE 
  last_name = 'Adams';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Bitmap Heap Scan on people  (cost=4.67..66.33 ...
1    Recheck Cond: ((last_name)::text = 'Adams'::...
2    ->  Bitmap Index Scan on idx_people_names  (...
3          Index Cond: ((last_name)::text = 'Adam...


In [15]:
# 查询器会使用索引进行优化查询
sql = """
EXPLAIN SELECT 
  id, 
  first_name, 
  last_name 
FROM 
  people 
WHERE 
  last_name = 'Adams' 
  AND first_name = 'Lou';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Index Scan using idx_people_names on people  (...
1    Index Cond: (((last_name)::text = 'Adams'::t...


In [16]:
# 查询器未使用索引进行优化查询，因为索引的顺序和查询的顺序不一致
sql = """
EXPLAIN SELECT 
  id, 
  first_name, 
  last_name 
FROM 
  people 
WHERE 
  first_name = 'Lou';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on people  (cost=0.00..186.00 rows=50...
1         Filter: ((first_name)::text = 'Lou'::text)


# 重建索引（ReIndex）

实际上，由于硬件故障或软件错误，索引可能会损坏，不再包含有效数据。此外，在不使用 CONCURRENTLY 选项的情况下创建索引时，如果索引构建失败，索引可能会失效。在这种情况下，可以使用 `REINDEX` 语句重建索引。

```
REINDEX [ ( option, ...) ] 
{ INDEX | TABLE | SCHEMA | DATABASE | SYSTEM }
name;
```

重建单个索引：
> REINDEX INDEX index_name;
> 
重建表的所有索引：
> REINDEX TABLE table_name;
> 
重建特定 `schema` 的所有索引：
> REINDEX SCHEMA schema_name;
> 
重建特定数据库的所有索引：
> REINDEX DATABASE database_name;
> 
特定数据库中的系统目录上重新创建所有索引：
> REINDEX SYSTEM database_name;
> 

## 重建 vs. 删除再新建的区别

它们的目的是一样的，只是锁定机制不同。

ReIndex 语句会
- 锁定索引所属表的写入但不锁定读取
- 对正在处理的索引加独占锁，从而阻止试图使用索引的读取

删除再建语句会
- 首先，`DROP INDEX` 通过获取对表的独占锁来锁定索引所属表的写入和读取
- 随后的 `CREATE INDEX` 语句会锁定写入，但不会锁定索引父表的读取。不过，在创建索引期间，读取可能会很昂贵。

# 删除索引

```
DROP INDEX [ IF EXISTS ] index_name;
```

# 枚举索引

可以通过访问 `pg_indexes` 视图来查看表的索引。`pg_indexes` 视图包括了以下列
- `schemaname`：存储包含表和索引的模式名称
- `tablename`：索引所属的表名称
- `indexname`：索引名称
- `tablespace`：索引所在的表空间
- `indexdef`：索引定义

In [17]:
sql = """
SELECT
    tablename,
    indexname,
    indexdef
FROM
    pg_indexes
WHERE
    schemaname = 'public'
ORDER BY
    tablename,
    indexname;
"""
run_sql(cursor, sql)

        tablename                                          indexname  \
0        accounts                                      accounts_pkey   
1           actor                                         actor_pkey   
2           actor                                idx_actor_last_name   
3         address                                       address_pkey   
4         address                                     idx_fk_city_id   
5        category                                      category_pkey   
6            city                                          city_pkey   
7            city                                  idx_fk_country_id   
8         country                                       country_pkey   
9        customer                                    customer_active   
10       customer                                      customer_pkey   
11       customer                                  idx_fk_address_id   
12       customer                                    idx_fk_stor

In [18]:
sql = """
SELECT
    indexname,
    indexdef
FROM
    pg_indexes
WHERE
    tablename = 'customer';
"""
run_sql(cursor, sql)

           indexname                                           indexdef
0      customer_pkey  CREATE UNIQUE INDEX customer_pkey ON public.cu...
1  idx_fk_address_id  CREATE INDEX idx_fk_address_id ON public.custo...
2    idx_fk_store_id  CREATE INDEX idx_fk_store_id ON public.custome...
3      idx_last_name  CREATE INDEX idx_last_name ON public.customer ...
4   idx_ic_last_name  CREATE INDEX idx_ic_last_name ON public.custom...
5    customer_active  CREATE INDEX customer_active ON public.custome...


In [19]:
sql = """
SELECT
    tablename,
    indexname,
    indexdef
FROM
    pg_indexes
WHERE
    tablename LIKE 'c%'
ORDER BY
    tablename,
    indexname;
"""
run_sql(cursor, sql)

  tablename          indexname  \
0  category      category_pkey   
1      city          city_pkey   
2      city  idx_fk_country_id   
3   country       country_pkey   
4  customer    customer_active   
5  customer      customer_pkey   
6  customer  idx_fk_address_id   
7  customer    idx_fk_store_id   
8  customer   idx_ic_last_name   
9  customer      idx_last_name   

                                            indexdef  
0  CREATE UNIQUE INDEX category_pkey ON public.ca...  
1  CREATE UNIQUE INDEX city_pkey ON public.city U...  
2  CREATE INDEX idx_fk_country_id ON public.city ...  
3  CREATE UNIQUE INDEX country_pkey ON public.cou...  
4  CREATE INDEX customer_active ON public.custome...  
5  CREATE UNIQUE INDEX customer_pkey ON public.cu...  
6  CREATE INDEX idx_fk_address_id ON public.custo...  
7  CREATE INDEX idx_fk_store_id ON public.custome...  
8  CREATE INDEX idx_ic_last_name ON public.custom...  
9  CREATE INDEX idx_last_name ON public.customer ...  


# 索引类型

## B-tree 索引

B 树是一种自平衡树，它能对数据进行排序，并允许在对数时间内进行搜索、插入、删除和顺序访问。当索引列涉及到以下的比较时，PostgreSQL 查询规划器会考虑使用 `B-tree` 索引：
```
<
<=
=
>=
BETWEEN
IN
IS NULL
IS NOT NULL
```

还有一种情况会使用 `B-tree` 索引的情况就是模式匹配查询，例如 `LIKE` 和 `~`。

## Hash 索引

`Hash 索引` 只能处理简单的 `=` 判断。

```
CREATE INDEX index_name 
ON table_name USING HASH (indexed_column);
```

## GIN 索引

`GIN` 是 `Generalized INverted` 的缩写。GIN 索引在单列存储多个值时最有用，例如 hstore、数组、jsonb 和范围类型。

## BRIN 索引

`BRIN` 是 `Block Range INdexes` 的缩写。与 `B-tree` 索引相比，`BRIN` 的体积更小，维护成本更低。`BRIN` 允许在大型表中使用索引，而在没有水平分区的情况下使用 `B-tree` 索引是不现实的。`BRIN` 通常用于具有线性排序顺序的列，例如销售订单表的创建日期列。

## GiST 索引

`GiST` 是 `Generalized Search Tree` 的缩写。`GiST` 索引允许建立一般的树形结构。`GiST` 索引可用于几何数据类型索引和全文检索。

## SP-GiST 索引

`SP-GiST` 是 `space-partitioned GiST` 的缩写。`SP-GiST` 支持分区搜索树，便于开发各种不同的非平衡数据结构。`SP-GiST` 索引最适用于具有自然聚类元素，但也不是均衡树的数据，例如地理信息系统、多媒体、电话路由和 IP 路由。

# JSON 



针对 `JSONB` 类型的列的索引，用于提高查询性能。PostgreSQL 使用 GIN 索引类型为具有 JSONB 数据类型的列建立索引。GIN 是 Generalized Inverted Index（广义反转索引）的缩写。

```
CREATE INDEX index_name
ON table_name 
USING GIN(jsonb_column);
```

当创建 `GIN` 索引时，可以使用特定的 GIN 运算符类。运算符类决定 PostgreSQL 如何构建索引以及如何优化索引列上的查询。例如：
```
CREATE INDEX index_name
ON table_name 
USING GIN(jsonb_column jsonb_path_ops);
```

`jsonb_path_ops` 就是一个运算符类，它允许使用 `@>`、`@@` 和 `?` 运算符来查询 JSONB 数据类型的列。运算符类别有：

| Name           | Indexable Operators    |
|----------------|------------------------|
| array_ops      | && (anyarray,anyarray) |
|                | @> (anyarray,anyarray) |
|                | <@ (anyarray,anyarray) |
|                | = (anyarray,anyarray)  |
| jsonb_ops      | @> (jsonb,jsonb)       |
|                | @? (jsonb,jsonpath)    |
|                | @@ (jsonb,jsonpath)    |
|                | ? (jsonb,text)         |
|                | ?\| (jsonb,text[])     |
|                | ?& (jsonb,text[])      |
| jsonb_path_ops | @> (jsonb,jsonb)       |
|                | @? (jsonb,jsonpath)    |
|                | @@ (jsonb,jsonpath)    |
| tsvector_ops   | @@ (tsvector,tsquery)  |
|                | @@@ (tsvector,tsquery) |


`GIN` 索引默认使用 `jsonb_ops` 运算符。如果要使用其他运算符类，可以在创建索引时指定。除此之外，PostgreSQL 还可以为指定字段创建 `GIN` 索引：
```
CREATE INDEX index_name 
ON table_name 
USING GIN ((data->'field_name') jsonb_path_ops);
```

In [9]:
sql = """
CREATE TABLE customer_json(
   id SERIAL PRIMARY KEY,
   data JSONB NOT NULL
);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x290a8ac5250>

In [10]:
sql = """
WITH json_cte AS(
  SELECT 
    jsonb_build_object(
      'first_name',  first_name, 
      'last_name',  last_name, 
      'email',  email, 
      'phone',  a.phone, 
      'address', 
      jsonb_build_object(
        'address', a.address, 
        'city', i.city, 
        'postal_code', a.postal_code, 
        'district',  a.district, 
        'country', o.country
      )
    ):: jsonb AS data 
  FROM 
    customer c 
    INNER JOIN address a ON a.address_id = c.address_id 
    INNER JOIN city i ON i.city_id = a.city_id 
    INNER JOIN country o ON o.country_id = i.country_id
) 
INSERT INTO customer_json(data) 
SELECT 
  data 
FROM 
  json_cte;
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x290a8ac5250>

In [11]:
# 查询
sql = """
EXPLAIN ANALYZE  
SELECT
   data ->> 'first_name' first_name,
   data ->> 'last_name' last_name,
   data ->> 'phone' phone
FROM
   customer_json
WHERE
   data @> '{"first_name": "John"}'; 
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on customer_json  (cost=0.00..62.33 r...
1    Filter: (data @> '{"first_name": "John"}'::j...
2                        Rows Removed by Filter: 598
3                            Planning Time: 0.053 ms
4                           Execution Time: 0.088 ms


In [12]:
# 创建索引
sql = """
CREATE INDEX customer_json_index 
ON customer_json 
USING GIN(data);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x290a8ac5250>

In [13]:
# 创建索引
sql = """
EXPLAIN ANALYZE  
SELECT
   data ->> 'first_name' first_name,
   data ->> 'last_name' last_name,
   data ->> 'phone' phone
FROM
   customer_json
WHERE
   data @> '{"first_name": "John"}'; 
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on customer_json  (cost=0.00..31.53 r...
1    Filter: (data @> '{"first_name": "John"}'::j...
2                        Rows Removed by Filter: 598
3                            Planning Time: 0.838 ms
4                           Execution Time: 0.084 ms


### 使用 GIN operator class

In [14]:
sql = """
DROP INDEX customer_json_index;

CREATE INDEX customer_json_index 
ON customer_json 
USING GIN(data jsonb_path_ops);
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x290a8ac5250>

In [15]:
# 创建索引
sql = """
EXPLAIN ANALYZE  
SELECT
   data ->> 'first_name' first_name,
   data ->> 'last_name' last_name,
   data ->> 'phone' phone
FROM
   customer_json
WHERE
   data @> '{"first_name": "John"}'; 
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Bitmap Heap Scan on customer_json  (cost=12.85...
1    Recheck Cond: (data @> '{"first_name": "John...
2                               Heap Blocks: exact=1
3    ->  Bitmap Index Scan on customer_json_index...
4          Index Cond: (data @> '{"first_name": "...
5                            Planning Time: 0.711 ms
6                           Execution Time: 0.031 ms


In [16]:
sql = """
EXPLAIN ANALYZE 
SELECT * FROM customer_json
WHERE data->>'first_name' = 'John';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Seq Scan on customer_json  (cost=0.00..32.98 r...
1    Filter: ((data ->> 'first_name'::text) = 'Jo...
2                        Rows Removed by Filter: 598
3                            Planning Time: 0.053 ms
4                           Execution Time: 0.080 ms


### 在指定字段创建索引

In [17]:
sql = """
DROP INDEX customer_json_index;

CREATE INDEX customer_json_index 
ON customer_json 
USING GIN((data->'first_name'));
"""
cursor.execute(sql)

<psycopg.Cursor [COMMAND_OK] [INTRANS] (host=localhost user=postgres database=dvdrental) at 0x290a8ac5250>

In [18]:
sql = """
EXPLAIN ANALYZE  
SELECT
   data ->> 'first_name' first_name,
   data ->> 'last_name' last_name,
   data ->> 'phone' phone
FROM
   customer_json
WHERE
   data->'first_name' @> '"John"';
"""
run_sql(cursor, sql)

                                          QUERY PLAN
0  Bitmap Heap Scan on customer_json  (cost=8.58....
1    Recheck Cond: ((data -> 'first_name'::text) ...
2                               Heap Blocks: exact=1
3    ->  Bitmap Index Scan on customer_json_index...
4          Index Cond: ((data -> 'first_name'::te...
5                            Planning Time: 0.565 ms
6                           Execution Time: 0.028 ms
