## Test 1: Tables vs Views: join distinct chrom, pos, ref, alt from cg and illumina tables for chromosome 1

### Quering Tables

#### Create tables of distinct chrom, pos, ref, alt

-- create table of distinct chrom, pos, ref, alt from illumina
create table p7_product.illumina_distinct
   (pos int,
   ref string,
   alt string
    )
partitioned by (chrom string, pos_block int)


-- create table of distinct chrom, pos, ref, alt from cg
create table p7_product.comgen_distinct
   (pos int,
   ref string,
   alt string
    )
partitioned by (chrom string, pos_block int)

#### Join tables

In [None]:
-- join variants
select count(1) from (
-- taking distinct here in case variants appear in both tables
SELECT distinct CAST(coalesce(t0.pos, t1.pos) AS int) AS pos,
       coalesce(t0.ref, t1.ref) AS ref,
       coalesce(t0.alt, t1.alt) AS alt,
       coalesce(t0.chrom, t1.chrom) AS chrom
FROM p7_product.illumina_distinct t0
FULL OUTER JOIN p7_product.comgen_distinct t1
    ON t0.chrom = t1.chrom
    AND t0.pos = t1.pos
    AND t0.ref = t1.ref
    AND t0.alt = t1.alt
WHERE t0.chrom = '1'
AND t1.chrom = '1'
  ) as test; 

In [None]:
Result 1: Fetched 1 row(s) in 6.03s
Result 2: Fetched 1 row(s) in 6.15s
Result 3: Fetched 1 row(s) in 5.98s
Average time: 6.05s

### Querying Views

In [None]:
-- create view of distinct chrom, pos, ref, alt from illumina
create view p7_product.illumina_view as
   (select * from p7_product.illumina_distinct)

-- create table of distinct chrom, pos, ref, alt from cg
create view p7_product.comgen_view as
   (select * from p7_product.comgen_distinct)

In [None]:
select count(1) from (
-- taking distinct here in case variants appear in both tables
SELECT distinct CAST(coalesce(t0.pos, t1.pos) AS int) AS pos,
       coalesce(t0.ref, t1.ref) AS ref,
       coalesce(t0.alt, t1.alt) AS alt,
       coalesce(t0.chrom, t1.chrom) AS chrom
FROM p7_product.illumina_view t0
FULL OUTER JOIN p7_product.comgen_view t1
    ON t0.chrom = t1.chrom
    AND t0.pos = t1.pos
    AND t0.ref = t1.ref
    AND t0.alt = t1.alt
WHERE t0.chrom = '1'
AND t1.chrom = '1'
  ) as test; 

In [None]:
Result 1: Fetched 1 row(s) in 5.89s
Result 2: Fetched 1 row(s) in 6.20s
Result 3: Fetched 1 row(s) in 5.92s
Average time: 6.00s

## Test 2: Querying a View that includes a Join

In [None]:
create view p7_product.test_view as

SELECT distinct CAST(coalesce(t0.pos, t1.pos) AS int) AS pos,
       coalesce(t0.ref, t1.ref) AS ref,
       coalesce(t0.alt, t1.alt) AS alt,
       coalesce(t0.chrom, t1.chrom) AS chrom
FROM p7_product.illumina_distinct t0
FULL OUTER JOIN p7_product.comgen_distinct t1
    ON t0.chrom = t1.chrom
    AND t0.pos = t1.pos
    AND t0.ref = t1.ref
    AND t0.alt = t1.alt;

select count(1) from (
select * 
from p7_product.test_view 
WHERE chrom = '1'
    )
as test;  


In [None]:
Result 1: Fetched 1 row(s) in 132.55s
Result 2: Fetched 1 row(s) in 130.79s
Result 3: Fetched 1 row(s) in 137.28s
Average Time: 133.54s

## Test 3: Creating View using WHERE clause

In [None]:
create view p7_product.test_view_chr1 as

SELECT distinct CAST(coalesce(t0.pos, t1.pos) AS int) AS pos,
       coalesce(t0.ref, t1.ref) AS ref,
       coalesce(t0.alt, t1.alt) AS alt,
       coalesce(t0.chrom, t1.chrom) AS chrom
FROM p7_product.illumina_distinct t0
FULL OUTER JOIN p7_product.comgen_distinct t1
    ON t0.chrom = t1.chrom
    AND t0.pos = t1.pos
    AND t0.ref = t1.ref
    AND t0.alt = t1.alt
WHERE t0.chrom = '1'
AND t1.chrom = '1';

select count(1) from (
select * 
from p7_product.test_view_chr1 
    )
as test; 

In [None]:
Result 1: Fetched 1 row(s) in 10.98s
Result 2: Fetched 1 row(s) in 9.99s
Result 3: Fetched 1 row(s) in 9.18s
Average Time: 10.05s