Compare using third table for many to many relationship with array column
200000 groups
≈ 4000000 posts
Each post belongs to 40 random groups
Install gems
$ bundle install
Create database demo_test
(assuming user postgres exists and has rights to create database):
$ psql -U postgres -c "CREATE DATABASE demo_test;"
Insert data (where each post belongs to 40 groups)
$ COUNT=40 ruby insert_data.rb
Assess database
$ psql -U postgres -d demo_test
EXPLAIN ANALYSE
SELECT * FROM posts
INNER JOIN groups_posts ON groups_posts.post_id = posts.id
WHERE groups_posts.group_id IN (84735, 65985, 68018, 198469, 177506, 116078, 33750, 185283, 123223, 183631, 60215, 134132, 64859, 122458, 3971, 33007, 154428, 25598, 8265, 74954, 63886, 18432, 114493, 95681, 195200, 6705, 181981, 86117, 73657, 10046, 134508, 177495, 177002, 58836, 143820, 69736, 178597, 179932, 27974, 8806, 164599, 67593, 164689, 101579, 75999, 50612, 55064, 69266, 131887, 179484, 164289, 15875, 59081, 50192, 23436, 165099, 148505, 122549, 29110, 156086, 86251, 19579, 188438, 74792, 64974, 131373, 142118, 185556, 78050, 116960, 188725, 17533, 119411, 58002, 122985, 166958, 196627, 102679, 75816, 39712, 151121, 108998, 168145, 120582, 80793, 154159, 60855, 116611, 113605, 75802, 180720, 121700, 9978, 67459, 145137, 65485, 19229, 144245, 183836, 172168)
LIMIT 30;
QUERY PLAN
---------------------------------------------------------------------
Limit (cost=1.00..282.83 rows=30 width=244) (actual time=0.080..11.681 rows=30 loops=1)
-> Nested Loop (cost=1.00..729046.13 rows=77606 width=244) (actual time=0.080..11.674 rows=30 loops=1)
-> Index Scan using index_groups_posts_on_group_id on groups_posts (cost=0.57..299509.38 rows=77606 width=12) (actual time=0.055..4.059 rows=30 loops=1)
Index Cond: (group_id = ANY ('{84735,65985,68018,198469,177506,116078,33750,185283,123223,183631,60215,134132,64859,122458,3971,33007,154428,25598,8265,74954,63886,18432,114493,95681,195200,6705,181981,86117,73657,10046,134508,177495,177002,58836,143820,69736,178597,179932,27974,8806,164599,67593,164689,101579,75999,50612,55064,69266,131887,179484,164289,15875,59081,50192,23436,165099,148505,122549,29110,156086,86251,19579,188438,74792,64974,131373,142118,185556,78050,116960,188725,17533,119411,58002,122985,166958,196627,102679,75816,39712,151121,108998,168145,120582,80793,154159,60855,116611,113605,75802,180720,121700,9978,67459,145137,65485,19229,144245,183836,172168}'::integer[]))
-> Index Scan using posts_pkey on posts (cost=0.43..5.52 rows=1 width=232) (actual time=0.251..0.252 rows=1 loops=30)
Index Cond: (id = groups_posts.post_id)
Planning time: 14.361 ms
Execution time: 11.738 ms
(9 rows)
EXPLAIN ANALYSE
SELECT * FROM posts
WHERE posts.id IN (
SELECT groups_posts.post_id FROM groups_posts
WHERE groups_posts.group_id IN (84735, 65985, 68018, 198469, 177506, 116078, 33750, 185283, 123223, 183631, 60215, 134132, 64859, 122458, 3971, 33007, 154428, 25598, 8265, 74954, 63886, 18432, 114493, 95681, 195200, 6705, 181981, 86117, 73657, 10046, 134508, 177495, 177002, 58836, 143820, 69736, 178597, 179932, 27974, 8806, 164599, 67593, 164689, 101579, 75999, 50612, 55064, 69266, 131887, 179484, 164289, 15875, 59081, 50192, 23436, 165099, 148505, 122549, 29110, 156086, 86251, 19579, 188438, 74792, 64974, 131373, 142118, 185556, 78050, 116960, 188725, 17533, 119411, 58002, 122985, 166958, 196627, 102679, 75816, 39712, 151121, 108998, 168145, 120582, 80793, 154159, 60855, 116611, 113605, 75802, 180720, 121700, 9978, 67459, 145137, 65485, 19229, 144245, 183836, 172168)
)
LIMIT 30;
QUERY PLAN
---------------------------------------------------------------------
Limit (cost=6.65..9648.04 rows=30 width=232) (actual time=17.264..73.969 rows=30 loops=1)
-> Merge Semi Join (cost=6.65..24941009.63 rows=77606 width=232) (actual time=17.263..73.948 rows=30 loops=1)
Merge Cond: (posts.id = groups_posts.post_id)
-> Index Scan using posts_pkey on posts (cost=0.43..232566.93 rows=3948967 width=232) (actual time=0.011..2.551 rows=1810 loops=1)
-> Index Scan using index_groups_posts_on_post_id on groups_posts (cost=0.57..24700739.35 rows=77606 width=4) (actual time=16.419..71.174 rows=30 loops=1)
Filter: (group_id = ANY ('{84735,65985,68018,198469,177506,116078,33750,185283,123223,183631,60215,134132,64859,122458,3971,33007,154428,25598,8265,74954,63886,18432,114493,95681,195200,6705,181981,86117,73657,10046,134508,177495,177002,58836,143820,69736,178597,179932,27974,8806,164599,67593,164689,101579,75999,50612,55064,69266,131887,179484,164289,15875,59081,50192,23436,165099,148505,122549,29110,156086,86251,19579,188438,74792,64974,131373,142118,185556,78050,116960,188725,17533,119411,58002,122985,166958,196627,102679,75816,39712,151121,108998,168145,120582,80793,154159,60855,116611,113605,75802,180720,121700,9978,67459,145137,65485,19229,144245,183836,172168}'::integer[]))
Rows Removed by Filter: 72357
Planning time: 0.882 ms
Execution time: 79.945 ms
(9 rows)
EXPLAIN ANALYSE
SELECT * FROM posts
WHERE posts.group_ids && ARRAY[84735, 65985, 68018, 198469, 177506, 116078, 33750, 185283, 123223, 183631, 60215, 134132, 64859, 122458, 3971, 33007, 154428, 25598, 8265, 74954, 63886, 18432, 114493, 95681, 195200, 6705, 181981, 86117, 73657, 10046, 134508, 177495, 177002, 58836, 143820, 69736, 178597, 179932, 27974, 8806, 164599, 67593, 164689, 101579, 75999, 50612, 55064, 69266, 131887, 179484, 164289, 15875, 59081, 50192, 23436, 165099, 148505, 122549, 29110, 156086, 86251, 19579, 188438, 74792, 64974, 131373, 142118, 185556, 78050, 116960, 188725, 17533, 119411, 58002, 122985, 166958, 196627, 102679, 75816, 39712, 151121, 108998, 168145, 120582, 80793, 154159, 60855, 116611, 113605, 75802, 180720, 121700, 9978, 67459, 145137, 65485, 19229, 144245, 183836, 172168]
LIMIT 30;
QUERY PLAN
---------------------------------------------------------------------
Limit (cost=0.00..3.46 rows=30 width=232) (actual time=4.040..29.780 rows=30 loops=1)
-> Seq Scan on posts (cost=0.00..178917.17 rows=1552860 width=232) (actual time=4.038..29.764 rows=30 loops=1)
Filter: (group_ids && '{84735,65985,68018,198469,177506,116078,33750,185283,123223,183631,60215,134132,64859,122458,3971,33007,154428,25598,8265,74954,63886,18432,114493,95681,195200,6705,181981,86117,73657,10046,134508,177495,177002,58836,143820,69736,178597,179932,27974,8806,164599,67593,164689,101579,75999,50612,55064,69266,131887,179484,164289,15875,59081,50192,23436,165099,148505,122549,29110,156086,86251,19579,188438,74792,64974,131373,142118,185556,78050,116960,188725,17533,119411,58002,122985,166958,196627,102679,75816,39712,151121,108998,168145,120582,80793,154159,60855,116611,113605,75802,180720,121700,9978,67459,145137,65485,19229,144245,183836,172168}'::integer[])
Rows Removed by Filter: 1780
Planning time: 6.543 ms
Execution time: 29.868 ms
(9 rows)
To check select using array column with GIST or GIN index need to insert large data sample...