In [None]:
# Cell 1: Import necessary libraries
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T

In [None]:
# Cell 2: No need for connection setup in Snowflake notebooks
# session is already available

# Cell 3: Create schema for harmonized data
%%sql
CREATE SCHEMA IF NOT EXISTS CRYPTO_DB.HARMONIZED_CRYPTO;

In [None]:
%%sql
CREATE OR REPLACE TABLE CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED (
    crypto_symbol VARCHAR(10),
    timestamp TIMESTAMP_NTZ,
    date_day DATE,
    open FLOAT,
    high FLOAT,
    low FLOAT,
    close FLOAT,
    volume FLOAT,
    adj_close FLOAT,
    price_change_24h FLOAT,
    price_change_percentage_24h FLOAT,
    volatility_7d FLOAT,
    normalized_price FLOAT,
    PRIMARY KEY (crypto_symbol, timestamp)
);

In [None]:
# Cell 5: Transform and harmonize Yahoo Finance crypto data
def transform_yahoo_finance_data():
    # Create references to raw tables (assuming they follow this structure)
    btc = session.table("CRYPTO_DB.PUBLIC.BTC_RAW")
    eth = session.table("CRYPTO_DB.PUBLIC.ETH_RAW")
    doge = session.table("CRYPTO_DB.PUBLIC.DOGE_RAW")
    
    # Function to standardize each cryptocurrency dataframe
    def standardize_crypto_df(df, symbol):
        return df.withColumn("crypto_symbol", F.lit(symbol)) \
                .withColumn("date_day", F.to_date(F.col("timestamp"))) \
                .withColumn("price_change_24h", F.col("close") - F.col("open")) \
                .withColumn("price_change_percentage_24h", 
                            (F.col("close") - F.col("open")) / F.col("open") * 100)
    
    # Standardize each dataframe
    btc_std = standardize_crypto_df(btc, "BTC")
    eth_std = standardize_crypto_df(eth, "ETH")
    doge_std = standardize_crypto_df(doge, "DOGE")
    
    # Union all data into a single DataFrame
    all_crypto = btc_std.unionAll(eth_std).unionAll(doge_std)
    
    # Standardize column names and structure
    harmonized = all_crypto.select(
        F.col("crypto_symbol"),
        F.col("timestamp"),
        F.col("date_day"),
        F.col("open"),
        F.col("high"),
        F.col("low"),
        F.col("close"),
        F.col("volume"),
        F.col("adjclose").alias("adj_close"),
        F.col("price_change_24h"),
        F.col("price_change_percentage_24h")
    )
    
    # Remove duplicates
    harmonized = harmonized.dropDuplicates(["crypto_symbol", "timestamp"])
    
    return harmonized

In [None]:
# Cell 6: Create SQL UDF for normalizing currency exchange rates
%%sql
CREATE OR REPLACE FUNCTION CRYPTO_DB.HARMONIZED_CRYPTO.NORMALIZE_CURRENCY(price FLOAT, from_currency VARCHAR, to_currency VARCHAR)
RETURNS FLOAT
AS
$$
    CASE 
        WHEN from_currency = to_currency THEN price
        WHEN from_currency = 'USD' AND to_currency = 'EUR' THEN price * 0.92
        WHEN from_currency = 'USD' AND to_currency = 'JPY' THEN price * 110.5
        ELSE price
    END
$$;

In [None]:
# Cell 7: Create Python UDF for calculating volatility
%%sql
CREATE OR REPLACE FUNCTION CRYPTO_DB.HARMONIZED_CRYPTO.CALCULATE_VOLATILITY(prices ARRAY)
RETURNS FLOAT
LANGUAGE PYTHON
RUNTIME_VERSION = '3.8'
HANDLER = 'calculate_volatility'
AS
$$
import numpy as np

def calculate_volatility(prices):
    if not prices or len(prices) < 2:
        return None
    
    # Convert to numpy array and calculate daily returns
    prices_array = np.array(prices, dtype=float)
    daily_returns = np.diff(prices_array) / prices_array[:-1]
    
    # Calculate standard deviation of returns (volatility)
    volatility = np.std(daily_returns) * np.sqrt(252)  # Annualized with 252 trading days
    
    return float(volatility)
$$;

In [None]:
# Cell 8: Create procedure to update volatility metrics
%%sql
CREATE OR REPLACE PROCEDURE CRYPTO_DB.HARMONIZED_CRYPTO.UPDATE_VOLATILITY_METRICS()
RETURNS STRING
LANGUAGE SQL
AS
$$
BEGIN
    -- Calculate and update 7-day volatility using close prices
    UPDATE CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED h
    SET volatility_7d = (
        SELECT CRYPTO_DB.HARMONIZED_CRYPTO.CALCULATE_VOLATILITY(ARRAY_AGG(close) WITHIN GROUP (ORDER BY timestamp))
        FROM CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED
        WHERE crypto_symbol = h.crypto_symbol
        AND timestamp BETWEEN DATEADD(day, -7, h.timestamp) AND h.timestamp
    )
    WHERE EXISTS (
        SELECT 1
        FROM CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED
        WHERE crypto_symbol = h.crypto_symbol
        AND timestamp BETWEEN DATEADD(day, -7, h.timestamp) AND h.timestamp
        HAVING COUNT(*) >= 2
    );
    
    -- Set normalized_price to equal the close price (since all data is already in USD)
    UPDATE CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED
    SET normalized_price = close;
    
    RETURN 'Volatility metrics and normalized prices updated successfully';
END;
$$;

In [None]:
# Cell 9: Example of executing the harmonization process
# Note: This would need real data in your raw tables to work
try:
    # Transform data
    print("Beginning data transformation...")
    harmonized_df = transform_yahoo_finance_data()
    
    # Save to harmonized table
    print("Saving to harmonized table...")
    harmonized_df.write.mode("overwrite").saveAsTable("CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED")
    
    # Update metrics
    print("Updating volatility metrics...")
    session.sql("CALL CRYPTO_DB.HARMONIZED_CRYPTO.UPDATE_VOLATILITY_METRICS()").collect()
    
    print("Data harmonization complete!")
except Exception as e:
    print(f"Error during harmonization: {str(e)}")
    print("Please ensure your raw tables exist and contain the expected columns.")


In [None]:
# Cell 10: Create a crypto correlation view
%%sql
CREATE OR REPLACE VIEW CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_CORRELATION AS
WITH daily_returns AS (
    SELECT 
        crypto_symbol,
        date_day,
        (close - LAG(close) OVER (PARTITION BY crypto_symbol ORDER BY date_day)) / LAG(close) OVER (PARTITION BY crypto_symbol ORDER BY date_day) * 100 AS daily_return
    FROM CRYPTO_DB.HARMONIZED_CRYPTO.CRYPTO_HARMONIZED
),
pivoted_returns AS (
    SELECT 
        date_day,
        MAX(CASE WHEN crypto_symbol = 'BTC' THEN daily_return ELSE NULL END) as btc_return,
        MAX(CASE WHEN crypto_symbol = 'ETH' THEN daily_return ELSE NULL END) as eth_return,
        MAX(CASE WHEN crypto_symbol = 'DOGE' THEN daily_return ELSE NULL END) as doge_return
    FROM daily_returns
    WHERE daily_return IS NOT NULL
    GROUP BY date_day
)
SELECT
    CORR(btc_return, eth_return) as btc_eth_correlation,
    CORR(btc_return, doge_return) as btc_doge_correlation,
    CORR(eth_return, doge_return) as eth_doge_correlation
FROM pivoted_returns;