Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion optimizer/column_groups/corr_from_plan.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
-- Y/N where Y - will create the column groups immediately
-- N - will print the column group creation script only
--
--
-- The cursor C1 includes some predicates I've commented out
-- If uncommented, they will reduce the number of columns comapared, but this
-- risks missing some correlated columns. I chose to leave these ideas
-- visible, but I think the best way to speed things up
-- is to reduce the row sample percentage.
--
var create_now varchar2(1)
set echo off
column tab_owner format a20
Expand Down Expand Up @@ -83,7 +90,7 @@ declare
select t1.column_name c1, t2.column_name c2
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
where t1.column_name > t2.column_name
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
order by t1.column_name;
c number(6,5);
Expand Down
9 changes: 7 additions & 2 deletions optimizer/column_groups/corr_from_sts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
-- Correlation is set to 80% - an arbitary figure
-- Data types limited
-- Only columns with shorter strings compared
-- Columns checked must have a 'similar' number of distinct values (NDVs must not differ by 2X)
-- A sample of rows can be used to speed up execution time - which can be substantial
--
-- Parameters:
Expand Down Expand Up @@ -71,6 +70,12 @@ declare
-- column pairs that have similar NDV - so some NULL cases will be missed.
-- There is also an assumption that longer strings are rarely used in comparison
--
-- The cursor W includes some predicates I've commented out
-- If uncommented, they will reduce the number of columns comapared, but this
-- risks missing some correlated columns. I chose to leave these ideas
-- visible, but I think the best way to speed things up
-- is to reduce the row sample percentage.
--
cursor c1 is
with w as (
select column_name, num_distinct
Expand All @@ -86,7 +91,7 @@ declare
select t1.column_name c1, t2.column_name c2
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
where t1.column_name > t2.column_name
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
order by t1.column_name;
c number(6,5);
Expand Down
10 changes: 8 additions & 2 deletions optimizer/column_groups/corr_from_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
-- Correlation is set to 80% - an arbitary figure
-- Data types limited
-- Only columns with shorter strings compared
-- Columns checked must have a 'similar' number of distinct values (NDVs must not differ by 2X)
-- A sample of rows can be used to speed up execution time - which can be substantial
--
-- Parameters:
Expand All @@ -15,6 +14,13 @@
-- Y/N - Y to create the column groups immediately
-- N to spool SQL to create column groups
--
--
-- The cursor C1 includes some predicates I've commented out
-- If uncommented, they will reduce the number of columns comapared, but this
-- risks missing some correlated columns. I chose to leave these ideas
-- visible, but I think the best way to speed things up
-- is to reduce the row sample percentage
--
var create_now varchar2(1)
set echo off
column tab_owner format a20
Expand Down Expand Up @@ -80,7 +86,7 @@ declare
select t1.column_name c1, t2.column_name c2
from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
where t1.column_name > t2.column_name
and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
--and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values? */
--and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
order by t1.column_name;
c number(6,5);
Expand Down