In [None]:
use role ACCOUNTADMIN;

In [None]:
create or replace database HUD_DEMO;
CREATE STAGE docstage
  ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE')
  DIRECTORY=(ENABLE=true);

In [None]:
drop database if exists HUD_HOUSINGFORECASTBYZIPCODE;

### Access Snowflake Marketplace and search for the listing:  ***US Housing Forecast by Zip Code | 2010 - 2035***
### "Get" the listing and name the database "HUD_HOUSINGFORECASTBYZIPCODE"

In [None]:
use role ACCOUNTADMIN;
use database HUD_HOUSINGFORECASTBYZIPCDE;
use schema ATERIO_DATASHEET_DEV;

### Look at the data we accessed thru Marketplace

In [None]:
select * from US_HOUSING_FORECAST_ZIPCODE limit 10;

In [None]:
import streamlit as st
st.image("HUDDemoArch.png")

In [None]:
from snowflake.snowpark.context import get_active_session
import snowflake.snowpark.functions as F
from snowflake.snowpark.functions import col
import streamlit as st
import altair as alt
session = get_active_session()

df_housing_forecast = session.table('US_HOUSING_FORECAST_ZIPCODE')


In [None]:
df_housing_forecast.show()

In [None]:
use schema tmanfredi.HUD;
create or replace table US_HOUSING_FORECAST_ZIPCODE as 
select * from HUD_HOUSINGFORECASTBYZIPCODE.ATERIO_DATASHEET_DEV.US_HOUSING_FORECAST_ZIPCODE;

### Build the RAG pipeline for unstructured documents

In [None]:
create or replace CORTEX SEARCH SERVICE GPRag_Service
ON chunk

warehouse = COMPUTE_WH
TARGET_LAG = '1 minute'
as (
    select chunk,
        filename,
        file_url
    from DOC_CHUNKS_TABLE
);

In [None]:
create or replace task process_new_files 
    warehouse = COMPUTE_WH
    schedule = '1 minutes'
    when system$stream_has_data('new_doc_stream')
    as
        BEGIN
            CREATE OR REPLACE TEMPORARY TABLE PARSED_DOCUMENTS AS
            SELECT 
                RELATIVE_PATH AS FILENAME,
                FILE_URL,
                SIZE,
                LAST_MODIFIED,
                TO_VARCHAR (
                    SNOWFLAKE.CORTEX.PARSE_DOCUMENT(
                        '@AllDocs',
                         relative_path,
                         {'mode': 'layout'}
                    )
                ) AS PARSED_CONTENT
            FROM 
                DIRECTORY(@AllDocs) 
            WHERE FILENAME IN (SELECT RELATIVE_PATH FROM new_doc_stream); -- for new documents only
                
            INSERT INTO DOC_CHUNKS_TABLE
               select filename, 
                    size,
                    file_url, 
                    to_varchar(c.value) as chunk 
               from PARSED_DOCUMENTS,
                   LATERAL FLATTEN( input => SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER (
                   PARSED_CONTENT,
                  'markdown',
                  1512, -- Chunk size
                  256 -- Overlap
               )) c;
                
        END;


create or replace stream new_doc_stream on stage AllDocs;
alter task process_new_files  resume;

### Download the PDF from the HUD website here: https://www.huduser.gov/portal/publications/pdf/National-CHMA-24.pdf

### Then upload to docstage in HUD_DEMO.PUBLIC
