Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Snowflake dialect update for MERGE INTO predicates #2670

Merged
merged 4 commits into from Feb 16, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
90 changes: 43 additions & 47 deletions benchmarks/bench_002/bench_002_pearson_fix.sql
Expand Up @@ -13,158 +13,158 @@ raw_effect_sizes AS (
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_small_subject_line to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_small_subject_line),
STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_small_subject_line)
STDDEV_POP(uses_small_subject_line)
) AS open_uses_small_subject_line,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_personal_subject to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_personal_subject),
STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_personal_subject)
STDDEV_POP(uses_personal_subject)
) AS open_uses_personal_subject,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_timewarp to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_timewarp), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_timewarp)
STDDEV_POP(uses_timewarp)
) AS open_uses_timewarp,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_small_preview to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_small_preview), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_small_preview)
STDDEV_POP(uses_small_preview)
) AS open_uses_small_preview,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_personal_to to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_personal_to), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_personal_to)
STDDEV_POP(uses_personal_to)
) AS open_uses_personal_to,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_ab_test_subject to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_ab_test_subject),
STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_ab_test_subject)
STDDEV_POP(uses_ab_test_subject)
) AS open_uses_ab_test_subject,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_ab_test_content to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_ab_test_content),
STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_ab_test_content)
STDDEV_POP(uses_ab_test_content)
) AS open_uses_ab_test_content,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_preview_text to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_preview_text), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_preview_text)
STDDEV_POP(uses_preview_text)
) AS open_uses_preview_text,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_sto to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_sto), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_sto)
STDDEV_POP(uses_sto)
) AS open_uses_sto,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_freemail_from to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_freemail_from), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_freemail_from)
STDDEV_POP(uses_freemail_from)
) AS open_uses_freemail_from,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_resend_non_openers to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_resend_non_openers),
STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_resend_non_openers)
STDDEV_POP(uses_resend_non_openers)
) AS open_uses_resend_non_openers,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_promo_code to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_promo_code), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_promo_code)
STDDEV_POP(uses_promo_code)
) AS open_uses_promo_code,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_prex to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_prex), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_prex)
STDDEV_POP(uses_prex)
) AS open_uses_prex,

-- The following is the slope of the regression line. Note that CORR (which is the Pearson's correlation
-- coefficient is symmetric in its arguments, but since STDDEV_POP(open_rate_su) appears in the
-- numerator this is the slope of the regression line considering STDDEV_POP(open_rate_su) to be
-- the "y variable" and uses_ab_test_from to be the "x variable" in terms of the regression line.
SAFE_DIVIDE(
SAFE_MULTIPLY(
SAFE_MULTIPLY(
CORR(open_rate_su, uses_ab_test_from), STDDEV_POP(open_rate_su)
),
STDDEV_POP(uses_ab_test_from)
STDDEV_POP(uses_ab_test_from)
) AS open_uses_ab_test_from

FROM
Expand All @@ -184,20 +184,20 @@ imputed_effect_sizes AS (
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(
IF(
IS_NAN(open_uses_small_subject_line),
0,
open_uses_small_subject_line
),
0
0
) AS open_uses_small_subject_line,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(
IF(
IS_NAN(open_uses_personal_subject),
0,
open_uses_personal_subject
Expand All @@ -210,33 +210,31 @@ imputed_effect_sizes AS (
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_timewarp), 0, open_uses_timewarp), 0
IF(IS_NAN(open_uses_timewarp), 0, open_uses_timewarp), 0
) AS open_uses_timewarp,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_small_preview), 0, open_uses_small_preview), 0
IF(IS_NAN(open_uses_small_preview), 0, open_uses_small_preview), 0
) AS open_uses_small_preview,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_personal_to), 0, open_uses_personal_to), 0
IF(IS_NAN(open_uses_personal_to), 0, open_uses_personal_to), 0
) AS open_uses_personal_to,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(
IS_NAN(open_uses_ab_test_subject), 0, open_uses_ab_test_subject
),
IF(IS_NAN(open_uses_ab_test_subject), 0, open_uses_ab_test_subject),
0
) AS open_uses_ab_test_subject,

Expand All @@ -245,9 +243,7 @@ imputed_effect_sizes AS (
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(
IS_NAN(open_uses_ab_test_content), 0, open_uses_ab_test_content
),
IF(IS_NAN(open_uses_ab_test_content), 0, open_uses_ab_test_content),
0
) AS open_uses_ab_test_content,

Expand All @@ -256,7 +252,7 @@ imputed_effect_sizes AS (
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_preview_text), 0, open_uses_preview_text), 0
IF(IS_NAN(open_uses_preview_text), 0, open_uses_preview_text), 0
) AS open_uses_preview_text,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
Expand All @@ -272,44 +268,44 @@ imputed_effect_sizes AS (
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_freemail_from), 0, open_uses_freemail_from), 0
IF(IS_NAN(open_uses_freemail_from), 0, open_uses_freemail_from), 0
) AS open_uses_freemail_from,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(
IF(
IS_NAN(open_uses_resend_non_openers),
0,
open_uses_resend_non_openers
),
0
0
) AS open_uses_resend_non_openers,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_promo_code), 0, open_uses_promo_code), 0
IF(IS_NAN(open_uses_promo_code), 0, open_uses_promo_code), 0
) AS open_uses_promo_code,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_prex), 0, open_uses_prex), 0
IF(IS_NAN(open_uses_prex), 0, open_uses_prex), 0
) AS open_uses_prex,

-- We now impute the value of the effect size to 0 if it was NaN or NULL. This is to
-- take into account states where all campaigns either did or did not perform an
-- action. In these cases, we assume that campaign outcome is uncorrelated with
-- the action because we do not have evidence otherwise.
COALESCE(
IF(IS_NAN(open_uses_ab_test_from), 0, open_uses_ab_test_from), 0
IF(IS_NAN(open_uses_ab_test_from), 0, open_uses_ab_test_from), 0
) AS open_uses_ab_test_from

FROM
Expand Down
2 changes: 2 additions & 0 deletions src/sqlfluff/dialects/dialect_snowflake.py
Expand Up @@ -3251,13 +3251,15 @@ class MergeNotMatchedClauseSegment(BaseSegment):
"WHEN",
"NOT",
"MATCHED",
Sequence("AND", Ref("ExpressionSegment"), optional=True),
"THEN",
seve-martinez marked this conversation as resolved.
Show resolved Hide resolved
),
)
parse_grammar = Sequence(
"WHEN",
"NOT",
"MATCHED",
Sequence("AND", Ref("ExpressionSegment"), optional=True),
"THEN",
Ref("MergeInsertClauseSegment"),
)
Expand Down
5 changes: 4 additions & 1 deletion test/fixtures/dialects/snowflake/snowflake_merge_into.sql
Expand Up @@ -6,4 +6,7 @@ merge into target_table using source_table
update set target_table.description = source_table.description;

merge into t1 using t2 on t1.t1key = t2.t2key
when matched and t2.marked = 1 then delete
when matched and t2.marked = 1 then delete;

merge into t1 using t2 on t1.t1key = t2.t2key
when not matched and t2.marked = 1 then insert (marked) values (1)
seve-martinez marked this conversation as resolved.
Show resolved Hide resolved