# UES IndexNestedLoopJoin workload

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json

import numpy as np
import pandas as pd

from transform import mosp
from postgres import explain
from analysis import selection

In [3]:
df_base_full = pd.read_csv("workloads/job-ues-results-base.csv")

df_idxnlj_base_full = pd.read_csv("workloads/job-ues-results-idxnlj.csv")
df_idxnlj_allnlj_full = pd.read_csv("workloads/job-ues-results-idxnlj-allnlj.csv")
df_idxnlj_pkidx_full = pd.read_csv("workloads/job-ues-results-idxnlj-pkdix.csv")
df_idxnlj_allnlj_pkidx_full = pd.read_csv("workloads/job-ues-results-idxnlj-allnlj-pkdix.csv")

In [4]:
df_base = selection.best_query_repetition(df_base_full, "label", performance_col="query_rt_total")
df_base["workload"] = "ues"

df_idxnlj_base = selection.best_query_repetition(df_idxnlj_base_full, "label", performance_col="query_rt_total")
df_idxnlj_base["workload"] = "ues_idxnlj"
df_idxnlj_base["nlj_scope"] = "inner"
df_idxnlj_base["indexed"] = "foreign_key"

df_idxnlj_allnlj = selection.best_query_repetition(df_idxnlj_allnlj_full, "label", performance_col="query_rt_total")
df_idxnlj_allnlj["workload"] = "ues_idxnlj"
df_idxnlj_allnlj["nlj_scope"] = "full"
df_idxnlj_allnlj["indexed"] = "foreign_key"

df_idxnlj_pkidx = selection.best_query_repetition(df_idxnlj_pkidx_full, "label", performance_col="query_rt_total")
df_idxnlj_pkidx["workload"] = "ues_idxnlj"
df_idxnlj_pkidx["nlj_scope"] = "inner"
df_idxnlj_pkidx["indexed"] = "primary_key"

df_idxnlj_allnlj_pkidx = selection.best_query_repetition(df_idxnlj_allnlj_pkidx_full, "label", performance_col="query_rt_total")
df_idxnlj_allnlj_pkidx["workload"] = "ues_idxnlj"
df_idxnlj_allnlj_pkidx["nlj_scope"] = "full"
df_idxnlj_allnlj_pkidx["indexed"] = "primary_key"

df_idxnlj = pd.concat([df_idxnlj_base, df_idxnlj_allnlj, df_idxnlj_pkidx, df_idxnlj_allnlj_pkidx])

In [5]:
df = pd.concat([df_base, df_idxnlj]).reset_index(drop=True).rename(columns={"query_rt_total": "rt_total"})

In [6]:
df_base_full.groupby("run").query_rt_total.sum()

run
1    295.824442
2    281.237423
3    278.699711
4    274.818035
5    275.428677
Name: query_rt_total, dtype: float64

In [7]:
df_idxnlj_base_full.groupby("run").query_rt_total.sum()

run
1    290.649480
2    267.113291
3    267.614191
4    269.312999
5    268.451439
Name: query_rt_total, dtype: float64

In [8]:
df_idxnlj_allnlj_full.groupby("run").query_rt_total.sum()

run
1    316.899507
2    294.063401
3    290.167805
4    293.375298
5    290.071217
Name: query_rt_total, dtype: float64

In [9]:
df_idxnlj_pkidx_full.groupby("run").query_rt_total.sum()

run
1    297.551554
2    277.121557
3    273.854234
4    272.044943
5    275.510750
Name: query_rt_total, dtype: float64

In [10]:
df_idxnlj_allnlj_pkidx_full.groupby("run").query_rt_total.sum()

run
1    319.239451
2    297.132287
3    296.387338
4    293.483264
5    294.063647
Name: query_rt_total, dtype: float64

In [11]:
df["query"] = df["query"].apply(mosp.MospQuery.parse)

In [12]:
df["query_result"] = df["query_result"].apply(json.loads)

In [13]:
df["explain_output"] = df.apply(lambda row: explain.parse_explain_analyze(row["query"], row["query_result"]), axis="columns")



In [14]:
df["n_subqueries"] = df["query"].apply(lambda q: len(q.subqueries()))

In [15]:
subqueries = df.loc[df.n_subqueries > 0]
ues_queries = subqueries.loc[subqueries.workload == "ues"].copy()
idxnlj_queries = subqueries.loc[subqueries.workload != "ues"].copy()

In [16]:
ues_queries.drop(columns=["run", "workload", "query_hint", "nlj_scope", "indexed"], inplace=True, errors="ignore")
ues_queries["subquery_explain"] = ues_queries["explain_output"].apply(lambda p: p.extract_subqueries())
ues_queries = ues_queries.explode("subquery_explain")
ues_queries["subquery_predicate"] = ues_queries["subquery_explain"].apply(lambda sq: sq.join_pred)
ues_queries["subquery_duration"] = ues_queries["subquery_explain"].apply(lambda sq: sq.exec_time)
ues_queries["subquery_pruned"] = ues_queries["subquery_explain"].apply(lambda sq: sq.pruned)

In [17]:
idxnlj_queries.drop(columns=["query", "run", "workload", "n_subqueries"], inplace=True, errors="ignore")
idxnlj_queries["subquery_explain"] = idxnlj_queries["explain_output"].apply(lambda p: p.extract_subqueries())
idxnlj_queries = idxnlj_queries.explode("subquery_explain")
idxnlj_queries["subquery_predicate"] = idxnlj_queries["subquery_explain"].apply(lambda sq: sq.join_pred)
idxnlj_queries["subquery_duration"] = idxnlj_queries["subquery_explain"].apply(lambda sq: sq.exec_time)
idxnlj_queries["subquery_pruned"] = idxnlj_queries["subquery_explain"].apply(lambda sq: sq.pruned)

In [18]:
def count_total_scanned_tuples(row, *, scan_ops=[explain.QueryNode.SEQ_SCAN]):
    if row.subquery_pruned_idxnlj:
        return 0
    scan_nodes = [row.subquery_explain_idxnlj.lookup_scan(tab) for tab in row.subquery.subquery.collect_tables()]
    matching_tables = [tab for tab in scan_nodes if tab.node in scan_ops]
    return sum(tab.incoming_rows(fallback_live=True) for tab in matching_tables)

In [19]:
subquery_features = idxnlj_queries.merge(ues_queries, how="inner", on=["label", "subquery_predicate"], suffixes=("_idxnlj", "_ues"))
subquery_features["subquery"] = subquery_features.apply(lambda row: row["query"].lookup_subquery(row["subquery_predicate"]), axis="columns")
subquery_features["subquery_outgoing_tuples"] = subquery_features.subquery.apply(lambda sq: sq.subquery.count_result_tuples())
subquery_features["subquery_idxlookup_tuples"] = subquery_features.apply(count_total_scanned_tuples, axis="columns", scan_ops=[explain.QueryNode.IDX_SCAN, explain.QueryNode.IDX_ONLY_SCAN])
subquery_features["subquery_scanned_tuples"] = subquery_features.apply(count_total_scanned_tuples, axis="columns", scan_ops=[explain.QueryNode.SEQ_SCAN, explain.QueryNode.BMP_SCAN])
subquery_features["n_subquery_joins"] = subquery_features.subquery.apply(lambda sq: len(sq.subquery.joins()))

In [20]:
df_export = subquery_features[
    ["label", "query", "query_hint", "n_subquery_joins",
     "query_result_ues", "rt_total_ues", "subquery_duration_ues", "subquery_pruned_ues",
     "nlj_scope", "indexed", "query_result_idxnlj", "rt_total_idxnlj", "subquery_duration_idxnlj", "subquery_pruned_idxnlj",
     "subquery_outgoing_tuples", "subquery_idxlookup_tuples", "subquery_scanned_tuples"
    ]
].copy()

df_export["query_result_ues"] = df_export["query_result_ues"].apply(json.dumps)
df_export["query_result_idxnlj"] = df_export["query_result_idxnlj"].apply(json.dumps)

df_export.to_csv("workloads/job-ues-features-idxnlj.csv", index=False)
df_export

Unnamed: 0,label,query,query_hint,n_subquery_joins,query_result_ues,rt_total_ues,subquery_duration_ues,subquery_pruned_ues,nlj_scope,indexed,query_result_idxnlj,rt_total_idxnlj,subquery_duration_idxnlj,subquery_pruned_idxnlj,subquery_outgoing_tuples,subquery_idxlookup_tuples,subquery_scanned_tuples
0,11a,SELECT COUNT(*) FROM movie_link AS ml JOIN lin...,/*+\nIndexOnlyScan(mk)\nNestLoop(mk k)\n*/,1,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.098920,0.000000,True,inner,foreign_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.124898,0.000000,True,11404,0,0
1,11a,SELECT COUNT(*) FROM movie_link AS ml JOIN lin...,/*+\nIndexOnlyScan(mk)\nNestLoop(mk k)\n*/,1,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.098920,0.000000,True,full,foreign_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.127996,0.000000,True,11404,0,0
2,11a,SELECT COUNT(*) FROM movie_link AS ml JOIN lin...,/*+\nIndexOnlyScan(k)\nNestLoop(mk k)\n*/,1,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.098920,0.000000,True,inner,primary_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.126593,0.000000,True,11404,0,0
3,11a,SELECT COUNT(*) FROM movie_link AS ml JOIN lin...,/*+\nIndexOnlyScan(k)\nNestLoop(mk k)\n*/,1,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.098920,0.000000,True,full,primary_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.125931,0.000000,True,11404,0,0
4,11b,SELECT COUNT(*) FROM movie_companies AS mc JOI...,/*+\nIndexOnlyScan(ml)\nIndexOnlyScan(mk)\nNes...,1,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.227532,0.000000,True,inner,foreign_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",0.326768,0.000000,True,11404,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
423,8c,SELECT COUNT(*) FROM aka_name AS a1 JOIN name ...,/*+\nIndexOnlyScan(rt)\nIndexOnlyScan(t)\nNest...,2,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",8.525333,4.857518,False,full,primary_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",16.263859,10.717574,False,5175636,68211953,0
424,8d,SELECT COUNT(*) FROM aka_name AS an1 JOIN name...,/*+\nIndexOnlyScan(ci)\nNestLoop(ci rt)\n*/,2,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",5.657469,3.853126,False,inner,foreign_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",2.594441,0.823238,False,518182,63475827,4736126
425,8d,SELECT COUNT(*) FROM aka_name AS an1 JOIN name...,/*+\nIndexOnlyScan(ci)\nIndexOnlyScan(t)\nNest...,2,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",5.657469,3.853126,False,full,foreign_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",2.568197,1.183267,False,518182,68211941,12
426,8d,SELECT COUNT(*) FROM aka_name AS an1 JOIN name...,/*+\nIndexOnlyScan(rt)\nNestLoop(ci rt)\n*/,2,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",5.657469,3.853126,False,inner,primary_key,"[{""Plan"": {""Node Type"": ""Aggregate"", ""Strategy...",2.590833,0.818465,False,518182,63475839,4736114


---

## Actual Rows shenanigans

In [21]:
regressions = subquery_features.copy()
regressions["subquery_rows_ues"] = regressions.subquery_explain_ues.apply(lambda sq: sq.proc_rows)
regressions["subquery_rows_idxnlj"] = regressions.subquery_explain_idxnlj.apply(lambda sq: sq.proc_rows)
regressions["subquery_operator_ues"] = regressions.subquery_explain_ues.apply(lambda sq: sq.node)
regressions["subquery_operator_idxnlj"] = regressions.subquery_explain_idxnlj.apply(lambda sq: sq.node)
regressions = regressions[(regressions["subquery_rows_ues"] != regressions["subquery_rows_idxnlj"]) & ~regressions.subquery_pruned_ues & ~regressions.subquery_pruned_idxnlj]
regressions[["label", "subquery_explain_ues", "subquery_explain_idxnlj", "subquery_rows_ues", "subquery_rows_idxnlj"]]

Unnamed: 0,label,subquery_explain_ues,subquery_explain_idxnlj,subquery_rows_ues,subquery_rows_idxnlj
28,13a,Hash Join (mi.info_type_id = it2.id) <- [Seq S...,Nested Loop (mi.info_type_id = it2.id) <- [Seq...,5433156,5433157
29,13a,Hash Join (mi.info_type_id = it2.id) <- [Seq S...,Nested Loop (mi.info_type_id = it2.id) <- [Seq...,5433156,5433157
30,13a,Hash Join (mi.info_type_id = it2.id) <- [Seq S...,Nested Loop (mi.info_type_id = it2.id) <- [Ind...,5433156,5433157
31,13a,Hash Join (mi.info_type_id = it2.id) <- [Seq S...,Nested Loop (mi.info_type_id = it2.id) <- [Ind...,5433156,5433157
32,13b,Hash Join (mc.company_id = cn.id) <- [Hash Joi...,Hash Join (mc.company_id = cn.id) <- [Nested L...,1320120,1320121
...,...,...,...,...,...
389,4c,Hash Join (mk.keyword_id = k.id) <- [Seq Scan ...,Nested Loop (mk.keyword_id = k.id) <- [Seq Sca...,14049,14050
390,4c,Hash Join (mk.keyword_id = k.id) <- [Seq Scan ...,Nested Loop (mk.keyword_id = k.id) <- [Index S...,14049,14050
391,4c,Hash Join (mk.keyword_id = k.id) <- [Seq Scan ...,Nested Loop (mk.keyword_id = k.id) <- [Index S...,14049,14050
425,8d,Hash Join (t.id = ci.movie_id) <- [Seq Scan ::...,Nested Loop (t.id = ci.movie_id) <- [Nested Lo...,518181,518182


In [22]:
regressions["subquery_row_ratio"] = (np.max([regressions["subquery_rows_ues"], regressions["subquery_rows_idxnlj"]], axis=0)
                                     /
                                     np.min([regressions["subquery_rows_ues"], regressions["subquery_rows_idxnlj"]], axis=0))

All different tuples counts are fractions of full numbers (we need more complicated logic here, b/c tuple counts can vary slightly as they are averaged)

In [23]:
regressions[(regressions["subquery_row_ratio"] % 1 > 0.05) & (regressions["subquery_row_ratio"] % 1 < 0.95)]

Unnamed: 0,label,query_result_idxnlj,rt_total_idxnlj,query_hint,nlj_scope,indexed,explain_output_idxnlj,subquery_explain_idxnlj,subquery_predicate,subquery_duration_idxnlj,...,subquery,subquery_outgoing_tuples,subquery_idxlookup_tuples,subquery_scanned_tuples,n_subquery_joins,subquery_rows_ues,subquery_rows_idxnlj,subquery_operator_ues,subquery_operator_idxnlj,subquery_row_ratio


In [24]:
regressions[regressions["subquery_explain_ues"].apply(lambda sq: sq.node) == regressions["subquery_explain_idxnlj"].apply(lambda sq: sq.node)][
    ["label", "subquery_explain_ues", "subquery_explain_idxnlj", "subquery_rows_ues", "subquery_rows_idxnlj", "subquery_row_ratio"]
]

Unnamed: 0,label,subquery_explain_ues,subquery_explain_idxnlj,subquery_rows_ues,subquery_rows_idxnlj,subquery_row_ratio
32,13b,Hash Join (mc.company_id = cn.id) <- [Hash Joi...,Hash Join (mc.company_id = cn.id) <- [Nested L...,1320120,1320121,1.000001
33,13b,Hash Join (mc.company_id = cn.id) <- [Hash Joi...,Hash Join (mc.company_id = cn.id) <- [Nested L...,1320120,1320121,1.000001
44,13d,Hash Join (mc.company_id = cn.id) <- [Hash Joi...,Hash Join (mc.company_id = cn.id) <- [Nested L...,1320120,1320121,1.000001
45,13d,Hash Join (mc.company_id = cn.id) <- [Hash Joi...,Hash Join (mc.company_id = cn.id) <- [Nested L...,1320120,1320121,1.000001


In [25]:
regressions.subquery_row_ratio.min(), regressions.subquery_row_ratio.max()

(1.000000184055087, 13726.0)

In [26]:
regressions.loc[regressions.subquery_row_ratio > 1.01][
    ["label", "subquery_explain_ues", "subquery_explain_idxnlj", "subquery_operator_ues", "subquery_operator_idxnlj", "subquery_rows_ues", "subquery_rows_idxnlj", "subquery_row_ratio"]
]

Unnamed: 0,label,subquery_explain_ues,subquery_explain_idxnlj,subquery_operator_ues,subquery_operator_idxnlj,subquery_rows_ues,subquery_rows_idxnlj,subquery_row_ratio
348,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
349,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
350,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
351,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
354,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0
355,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0
360,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
361,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
362,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
363,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0


In [27]:
regressions.loc[(regressions.subquery_row_ratio > 1.01) & ~regressions.subquery_pruned_ues & ~regressions.subquery_pruned_idxnlj][
    ["label", "subquery_explain_ues", "subquery_explain_idxnlj", "subquery_operator_ues", "subquery_operator_idxnlj", "subquery_rows_ues", "subquery_rows_idxnlj", "subquery_row_ratio"]
]

Unnamed: 0,label,subquery_explain_ues,subquery_explain_idxnlj,subquery_operator_ues,subquery_operator_idxnlj,subquery_rows_ues,subquery_rows_idxnlj,subquery_row_ratio
348,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
349,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
350,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
351,33a,Hash Join (mi_idx1.info_type_id = it1.id) <- [...,Nested Loop (mi_idx1.info_type_id = it1.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,789156,2367465,2.999996
354,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0
355,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0
360,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
361,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
362,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0
363,33c,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,20658,61974,3.0


In [28]:
len(regressions.loc[(regressions.subquery_row_ratio > 1.01) &
                    (regressions.explain_output_ues.apply(explain.PlanNode.any_pruned) |
                    regressions.explain_output_idxnlj.apply(explain.PlanNode.any_pruned))])

12

In [29]:
len(subquery_features.loc[subquery_features.explain_output_ues.apply(explain.PlanNode.any_pruned) | subquery_features.explain_output_idxnlj.apply(explain.PlanNode.any_pruned)])

260

In [30]:
regressions.loc[(regressions.subquery_row_ratio > 1.01) &
                    ~regressions.explain_output_ues.apply(explain.PlanNode.any_pruned) &
                    ~regressions.explain_output_idxnlj.apply(explain.PlanNode.any_pruned)][
    ["label", "subquery_explain_ues", "subquery_explain_idxnlj", "subquery_operator_ues", "subquery_operator_idxnlj", "subquery_rows_ues", "subquery_rows_idxnlj", "subquery_row_ratio"]
]

Unnamed: 0,label,subquery_explain_ues,subquery_explain_idxnlj,subquery_operator_ues,subquery_operator_idxnlj,subquery_rows_ues,subquery_rows_idxnlj,subquery_row_ratio
354,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0
355,33b,Hash Join (mi_idx2.info_type_id = it2.id) <- [...,Nested Loop (mi_idx2.info_type_id = it2.id) <-...,QueryNode.HASH_JOIN,QueryNode.NESTED_LOOP,1,13726,13726.0


In [31]:
r = regressions.loc[(regressions.subquery_row_ratio > 1.01) &
                    ~regressions.explain_output_ues.apply(explain.PlanNode.any_pruned) &
                    ~regressions.explain_output_idxnlj.apply(explain.PlanNode.any_pruned)].iloc[0]
r

label                                                                      33b
query_result_idxnlj          [{'Plan': {'Node Type': 'Aggregate', 'Strategy...
rt_total_idxnlj                                                       0.853289
query_hint                   /*+\nIndexOnlyScan(it1)\nIndexOnlyScan(it2)\nN...
nlj_scope                                                                inner
indexed                                                            primary_key
explain_output_idxnlj        Hash Join (mc1.company_id = cn1.id) <- [Hash J...
subquery_explain_idxnlj      Nested Loop (mi_idx2.info_type_id = it2.id) <-...
subquery_predicate                             (mi_idx2.info_type_id = it2.id)
subquery_duration_idxnlj                                              0.139529
subquery_pruned_idxnlj                                                   False
query                        SELECT COUNT(*) FROM movie_link AS ml JOIN lin...
query_result_ues             [{'Plan': {'Node Type':

In [32]:
print(r.subquery_explain_ues.pretty_print())

[SQ] Hash Join (mi_idx2.info_type_id = it2.id)
  <- Seq Scan :: movie_info_idx
  <- Seq Scan :: info_type



In [33]:
print(r.subquery_explain_idxnlj.pretty_print())

[SQ] Nested Loop (mi_idx2.info_type_id = it2.id)
  <- Index Scan :: info_type
  <- Bitmap Scan (virtual) :: movie_info_idx

