# Tables

    CREATE [ OR REPLACE ]
        [ { [ { LOCAL | GLOBAL } ] TEMP | TEMPORARY | VOLATILE | TRANSIENT } ]
      TABLE [ IF NOT EXISTS ] <table_name>    
      (
        -- Column definition 
        -- Additional column definitions           
        -- Out-of-line constraints        
      )
    
      [ CLUSTER BY ( <expr> [ , <expr> , ... ] ) ]
      [ ENABLE_SCHEMA_EVOLUTION = { TRUE | FALSE } ]
      [ DATA_RETENTION_TIME_IN_DAYS = <integer> ]
      [ MAX_DATA_EXTENSION_TIME_IN_DAYS = <integer> ]
      [ CHANGE_TRACKING = { TRUE | FALSE } ]
      [ DEFAULT_DDL_COLLATION = '<collation_specification>' ]
      [ COPY GRANTS ]
      [ COPY TAGS ]
      [ COMMENT = '<string_literal>' ]
      [ [ WITH ] ROW ACCESS POLICY <policy_name> ON ( <col_name> [ , <col_name> ... ] ) ]
      [ [ WITH ] AGGREGATION POLICY <policy_name> [ ENTITY KEY ( <col_name> [ , <col_name> ... ] ) ] ]
      [ [ WITH ] JOIN POLICY <policy_name> [ ALLOWED JOIN KEYS ( <col_name> [ , ... ] ) ] ]
      [ [ WITH ] STORAGE LIFECYCLE POLICY <policy_name> ON ( <col_name> [ , <col_name> ... ] ) ]
      [ [ WITH ] TAG ( <tag_name> = '<tag_value>' [ , <tag_name> = '<tag_value>' , ... ] ) ]
      [ WITH CONTACT ( <purpose> = <contact_name> [ , <purpose> = <contact_name> ... ] ) ]




## Permanent

- Default
- Time Travel (up to 90 days on Enterprise+) 
- 7‑day Fail‑safe for disaster recovery. 
- Recomended for production data

### Cloning: 
- Permanent => Temporary 
- Permanent => Transient 
- Permanent => Permanent

In [None]:
use sf_cert_prep.public;

CREATE OR REPLACE TABLE t_perm (id INT, v STRING);
INSERT INTO t_perm VALUES (1,'A'),(2,'B');


## Transient

- Persist until dropped 
- No Fail‑safe
- Time Travel 0–1 day
- Recommended for intermediate/staging data


### Cloning: 
- Transient => Temporary 
- Transient => Transient

In [None]:
use sf_cert_prep.public;

-- Transient (no Fail-safe; Time Travel 0-1 day)
CREATE OR REPLACE TRANSIENT TABLE t_tran (id INT, v STRING);
INSERT INTO t_tran VALUES (1,'A'),(2,'B');



## Temporary

- session‑scoped
- auto‑dropped at session end
- no Fail‑safe.

***Important*** you can have a temporary table with the same name as permanent/transient table. The Temporary table will take precedent.

The data stored in the table contributes to the overall storage charges that Snowflake bills your account. 

### Cloning: 
- Temporary => Temporary 
- Temporary => Transient


In [None]:
use sf_cert_prep.public;
-- Temporary (session-scoped)
CREATE OR REPLACE TEMPORARY TABLE t_temp (id INT, v STRING);
INSERT INTO t_temp VALUES (99,'TEMP');


In [None]:

use sf_cert_prep.public;

CREATE OR REPLACE TEMPORARY TABLE t_perm (id INT, v STRING);  -- shadows the permanent
SELECT * FROM t_perm;  -- returns the temp table
--DROP TABLE IF EXISTS t_perm; -- drops the temp; permanent still exists


## External

- read‑only metadata layer over files in S3/Azure/GCS
- no DML allowed
- can be used in query and join operations
- typically slower than native tables
- consider a materialized view for speed


### File Format:
- Apache Parquet
- Apache Avro
- ORC
- JSON
- CSV files

#### Conditions
- An external table doesn’t inherit FILE_FORMAT options specified in a stage definition when that stage is used for loading data into the table.
- You must explicitly do so in the external table definition. 
- Snowflake uses defaults for any FILE_FORMAT parameters omitted from the external table definition.

    Default: TYPE = CSV


### Column Defintion

#### col_name
String that specifies the column identifier.

#### col_type
The data type must match the result of expr for the column.

#### expr

String that specifies the expression for the column.

- CSV: 
    mycol varchar as (value:c1::varchar)
- Semi-structured: 
    mycol varchar as (value:"b"."c"::varchar)

#### METADATA$FILENAME

A pseudocolumn that identifies the name of each staged data file that is included in the external table, including its path in the stage

### REFRESH_ON_CREATE 
pecifies whether to automatically refresh the external table metadata one time.

### AUTO_REFRESH 
Specifies whether Snowflake should enable triggering automatic refreshes of the external table metadata when new or updated data files are available in the named external stage

#### AWS_SNS_TOPIC 
Required only when configuring AUTO_REFRESH for Amazon S3 stages using Amazon Simple Notification Service (SNS)





In [ ]:
-- EXTERNAL - Create external table
use sf_cert_prep.public;

drop  external table if exists Loan_payments_data;

CREATE OR REPLACE FILE FORMAT csv_fmt_loan
  TYPE = CSV
  FIELD_DELIMITER = ','
  FIELD_OPTIONALLY_ENCLOSED_BY = '"'
  SKIP_HEADER = 1
  EMPTY_FIELD_AS_NULL = TRUE
  NULL_IF = ('', 'NULL');

CREATE OR REPLACE EXTERNAL TABLE Loan_payments_data
(
    loan_id         STRING           AS (VALUE:c1::STRING),    
    loan_status     STRING           AS (VALUE:c2::STRING),
    principal       NUMBER(18,2)     AS (TRY_TO_DECIMAL(VALUE:c3::STRING, 18, 2)),
    terms           NUMBER(9,0)      AS (TRY_TO_NUMBER(VALUE:c4::STRING)),
    effective_date  DATE             AS (TRY_TO_DATE(VALUE:c5::STRING)),
    due_date        DATE             AS (TRY_TO_DATE(VALUE:c6::STRING)),
    paid_off_time   TIMESTAMP_NTZ    AS (TRY_TO_TIMESTAMP_NTZ(VALUE:c7::STRING, 'MM/DD/YYYY HH:MI')),
    past_due_days   NUMBER(9,0)      AS (TRY_TO_NUMBER(VALUE:c8::STRING)),
    age             NUMBER(9,0)      AS (TRY_TO_NUMBER(VALUE:c9::STRING)),
    education       STRING           AS (VALUE:c10::STRING),
    gender          STRING           AS (VALUE:c11::STRING),    
    file_name       STRING           AS (METADATA$FILENAME),
    file_row_number NUMBER           AS (METADATA$FILE_ROW_NUMBER)
)

  WITH LOCATION=@AWS_STAGE
  PATTERN = '.*Loan_payments_data\.csv$'
  FILE_FORMAT= (FORMAT_NAME = SF_CERT_PREP.public.csv_fmt_loan)
  AUTO_REFRESH=FALSE;

  
select * from sf_cert_prep.public.Loan_payments_data;


## Hybrid

***Hybrid tables are currently not available to trial accounts.***

Hybrid table is a high performance table focused for transational operation (OLTP‑like access).

 - row‑based storage, not columnar as nomral tables, with indexes for low‑latency. 
 - PRIMARY KEY required
 - General Available on AWS/Azure only
 - Not allowed as transient/temporary

 ### Use cases:

- Metadata for applications and workflows, such as maintaining state for an ingestion workflow that requires high-concurrency updates to a single table from thousands of parallel workers.
- Lower-latency serving of precomputed aggregates through an API or a user interface.
- Lightweight transactional applications with relational data models.
 
### Architecture

- Uses the same Snowflake database service as normal tables.
- Queries are compiled and optimized in the cloud services layer and executed in the same query engine and virtual warehouses as standard tables

![alt](https://docs.snowflake.com/en/_images/unistore-arch.png)

------------------------------------------------------------------------


### Features



|Feature     | Hybrid                                                        | Standard                 |
|------------|-----------------------------------------------                |--------------------------|
|data layout | Row-oriented, with secondary columnar storage                 | Columnar micro-partitions|
|Locking     | Row-level                                                     | Partition or table       |
|PRIMARY KEY | Required, enforced                                            | Optional, not enforced   |
|FOREIGN KEY | Optional, enforced (referential integrity)                    | Optional, not enforced   |
|UNIQUE      | Optional (except for PRIMARY KEY), enforced                   | Optional, not enforced   |
|NOT NULL    | Optional (except for PRIMARY KEY), enforced                   | Optional, not enforced   |
|Indexes     | Supported for performance- updated synchronously on writes    |  search optimization, not enforced   |


In [None]:
-- HYBRID - Create external table
--- Hybrid tables are currently not available to trial accounts.
CREATE OR REPLACE HYBRID TABLE sf_cert_prep.public.application_log (
  id NUMBER PRIMARY KEY AUTOINCREMENT,
  col1 VARCHAR(20),
  col2 VARCHAR(20) NOT NULL
  );

INSERT INTO sf_cert_prep.public.application_log (col1, col2) VALUES ('A1', 'B1');
INSERT INTO sf_cert_prep.public.application_log (col1, col2) VALUES ('A2', 'B2');
INSERT INTO sf_cert_prep.public.application_log (col1, col2) VALUES ('A3', 'B3');
INSERT INTO sf_cert_prep.public.application_log (col1, col2) VALUES ('A4', 'B4');




INSERT INTO application_log (col1, col2) VALUES ('A1', 'B1');
INSERT INTO application_log (col1, col2) VALUES ('A2', 'B2');
INSERT INTO application_log (col1, col2) VALUES ('A3', 'B3');
INSERT INTO application_log (col1, col2) VALUES ('A4', 'B4');

SELECT * FROM application_log;

UPDATE application_log SET col2 = 'B3-updated' WHERE id = 3;

DELETE FROM application_log WHERE id = 4;

SELECT * FROM application_log;


## Iceberg tables

- data + metadata stored in your cloud storage (via an external volume).
- No Fail‑safe
- Snowflake‑managed variant supports full DML
- external‑catalog variant requires metadata refreshes

In [None]:
-- Pre-req: an EXTERNAL VOLUME to your cloud storage
CREATE OR REPLACE ICEBERG TABLE ic_orders (
  order_id INT, amount DOUBLE, created_ts TIMESTAMP_NTZ
)
CATALOG = 'SNOWFLAKE'
EXTERNAL_VOLUME = 'MY_EXT_VOL'
BASE_LOCATION = 'iceberg/ic_orders';

INSERT INTO ic_orders VALUES (1, 12.34, CURRENT_TIMESTAMP());
SELECT * FROM ic_orders;

## Dynamic

- always‑fresh” tables maintained by Snowflake from a defining query + TARGET_LAG
- simplify pipelines vs streams/tasks
- Have important conditions / limitations:
    - source table must have change tracking enabled
    - 

In [None]:
-- Base table with change tracking + non-zero Time Travel (required for incremental mode)
CREATE OR REPLACE TABLE raw_events (id NUMBER, ts TIMESTAMP, payload VARIANT);
ALTER TABLE raw_events SET CHANGE_TRACKING = TRUE, DATA_RETENTION_TIME_IN_DAYS = 1;

-- Append some data
INSERT INTO raw_events SELECT SEQ4(), CURRENT_TIMESTAMP(), OBJECT_CONSTRUCT('k','v') FROM TABLE(GENERATOR(ROWCOUNT=>10));

-- Dynamic table that aggregates counts by day, with 5-min target lag
CREATE OR REPLACE DYNAMIC TABLE dt_events_daily (
  day DATE,
  cnt NUMBER
)
TARGET_LAG = '5 minutes'
WAREHOUSE = COMPUTE_WH
REFRESH_MODE = INCREMENTAL
AS
SELECT DATE_TRUNC('DAY', ts) AS day, COUNT(*) AS cnt
FROM raw_events
GROUP BY 1;

-- Query it like a normal table
SELECT * FROM dt_events_daily ORDER BY day DESC;

## Directory Tables 
- Pseudo-table.
- Listing of files on a stage
- Check NoSQL notebook for more details