# Connection

> Helps Manage Snowflake Connection

In [1]:
#| default_exp connection

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export

import os
import logging
import yaml
import warnings

from typing import Optional, Dict
from snowflake.snowpark import Session
from snowflake.snowpark.context import get_active_session
from snowflake.snowpark.exceptions import SnowparkSessionException


logging.getLogger('snowflake.snowpark').setLevel(logging.WARNING)
 

In [4]:
#| export

class SnowparkConnection:
    """
    Manages Snowpark connection sessions, configuration, and lifecycle.
    """

    def __init__(self, connection_config: Optional[Dict[str, str]] = None, config_file: str = 'snowflake_config.yaml'):
        # If a connection config is provided, use it. Otherwise, load from a YAML file or environment variables.
        self.connection_config = connection_config or self.load_connection_config(config_file)
        self.session = self._get_active_or_new_session()

    def load_connection_config(self, yaml_file: str) -> Dict[str, str]:
        """
        Load the Snowflake connection configuration from a YAML file or environment variables.

        Args:
            yaml_file (str): The path to the YAML file.

        Returns:
            Dict[str, str]: The Snowflake connection configuration.
        """
        config = {}
        if os.path.isfile(yaml_file):
            try:
                with open(yaml_file, 'r') as file:
                    config = yaml.safe_load(file).get('snowflake', {})
            except FileNotFoundError:
                logging.warning(f"Configuration file '{yaml_file}' not found. Falling back to environment variables.")
        
        # Fallback to environment variables if certain keys are missing
        config.update({
            'account': config.get('account') or os.getenv('SNOWFLAKE_ACCOUNT'),
            'user': config.get('user') or os.getenv('SNOWFLAKE_USER'),
            'password': config.get('password') or os.getenv('SNOWFLAKE_PASSWORD'),
            'role': config.get('role') or os.getenv('SNOWFLAKE_ROLE', 'ACCOUNTADMIN'),
            'warehouse': config.get('warehouse') or os.getenv('SNOWFLAKE_WAREHOUSE'),
            'database': config.get('database') or os.getenv('SNOWFLAKE_DATABASE'),
            'schema': config.get('schema') or os.getenv('SNOWFLAKE_SCHEMA')
        })
        return config

    def _get_active_or_new_session(self) -> Session:
        """
        Get the active Snowpark session or create a new one if none exists.

        Returns:
            Session: The Snowpark session.
        """
        try:
            session = get_active_session()
            logging.info("Using active Snowpark session.")
        except SnowparkSessionException:
            session = self.create_session()
        return session

    def create_session(self) -> Session:
        """
        Create a new Snowpark session using the provided configuration.

        Returns:
            Session: The new Snowpark session.
        """
        session_config = self.connection_config
        try:
            session = Session.builder.configs(session_config).create()
            logging.info("Snowpark session successfully created.")
            return session
        except SnowparkSessionException as e:
            logging.error(f"Error creating Snowpark session: {e}")
            raise e

    def get_session(self) -> Session:
        """
        Return the Snowpark session.

        Returns:
            Session: The Snowpark session.
        """
        return self.session

    def close_session(self) -> None:
        """
        Close the Snowpark session.
        """
        try:
            self.session.close()
        except SnowparkSessionException as e:
            logging.error(f"Error closing Snowpark session: {e}")


In [5]:
#| skip
from snowflake.snowpark.version import VERSION
from snowflake.snowpark.functions import col


# Create a SnowparkConnection instance
connection = SnowparkConnection(
    connection_config={
        'user': os.getenv('SNOWFLAKE_USER'),
        'password': os.getenv('SNOWFLAKE_PASSWORD'),
        'account': os.getenv('SNOWFLAKE_ACCOUNT'),
        'database': 'CORTEX',
        'warehouse': 'CORTEX_WH',
        'schema': 'DEV',
        'role': 'CORTEX_USER_ROLE'  # Use the desired role
    }
)

# Get the Snowpark session from the connection
session = connection.get_session()

# Enable SQL simplifier
session.sql_simplifier_enabled = True

# Get environment and Snowpark version details
snowflake_environment = session.sql('SELECT current_user(), current_version()').collect()
snowpark_version = VERSION

print('\nConnection Established with the following parameters:')
print('User                        : {}'.format(snowflake_environment[0][0]))
print('Role                        : {}'.format(session.get_current_role()))
print('Database                    : {}'.format(session.get_current_database()))
print('Schema                      : {}'.format(session.get_current_schema()))
print('Warehouse                   : {}'.format(session.get_current_warehouse()))
print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0], snowpark_version[1], snowpark_version[2]))



Connection Established with the following parameters:
User                        : JD_SERVICE_ACCOUNT_ADMIN
Role                        : "CORTEX_USER_ROLE"
Database                    : "CORTEX"
Schema                      : "DEV"
Warehouse                   : "CORTEX_WH"
Snowflake version           : 8.31.1
Snowpark for Python version : 1.19.0


In [7]:
#| hide
import nbdev; nbdev.nbdev_export()

In [22]:
import re
import time
from bs4 import BeautifulSoup
import requests

def normalize_package_name(name):
    return re.sub(r'(?<!^)_(?!$)', '-', name)

def read_requirements(filename):
    with open(filename, 'r') as file:
        content = file.read()
    packages = [normalize_package_name(name.lower()) for name in re.findall(r'^([a-zA-Z0-9._-]+)', content, re.MULTILINE)]
    return set(packages)

def compare_requirements(file1, file2, show_all=False):
    packages1 = read_requirements(file1)
    packages2 = read_requirements(file2)
    only_in_file1 = packages1 - packages2
    print(f"Packages in {file1} but not in {file2} (case-insensitive comparison, underscores converted to hyphens):")
    if show_all:
        for package in sorted(only_in_file1):
            print(f"- {package}")
    return only_in_file1

def scrape_anaconda_packages(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find('table')
    packages = []
    if table:
        rows = table.find_all('tr')
        for row in rows[1:]:
            columns = row.find_all('td')
            if len(columns) >= 2:
                package_name = columns[0].text.strip()
                version = columns[1].text.strip()
                version = re.split(r'[_-]', version)[0]
                packages.append((package_name, version))
    return packages

def create_requirements_file(packages, filename):
    with open(filename, "w") as file:
        for package_name, version in packages:
            package_name = re.sub(r'[^\w.-]', '', package_name)
            file.write(f"{package_name}=={version}\n")
    print(f"Created {filename} with {len(packages)} packages.")

def check_pypi_availability(packages):
    available_on_pypi = []
    not_on_pypi = []
    for package in packages:
        url = f"https://pypi.org/pypi/{package}/json"
        response = requests.get(url)
        if response.status_code == 200:
            available_on_pypi.append(package)
        else:
            not_on_pypi.append(package)
        time.sleep(1)
    return available_on_pypi, not_on_pypi

anaconda_url = "https://repo.anaconda.com/pkgs/snowflake/"
packages = scrape_anaconda_packages(anaconda_url)
create_requirements_file(packages, "requirements.txt")

diff_packages = compare_requirements('requirements copy.txt', 'requirements.txt')
available_on_pypi, not_on_pypi = check_pypi_availability(diff_packages)

print(f"\nPackages available on PyPI ({len(available_on_pypi)}):")
for package in available_on_pypi:
    print(f"- {package}")

print(f"\nPackages not found on PyPI ({len(not_on_pypi)}):")
for package in not_on_pypi:
    print(f"- {package}")

print(f"\nTotal packages checked: {len(diff_packages)}")
print(f"Available on PyPI: {len(available_on_pypi)}")
print(f"Not found on PyPI: {len(not_on_pypi)}")


Created requirements.txt with 2335 packages.
Packages in requirements copy.txt but not in requirements.txt (case-insensitive comparison, underscores converted to hyphens):

Packages available on PyPI (48):
- fastjsonschema
- sagemaker
- snowflake-core
- cli-ui
- pyupgrade
- blacken-docs
- duckdb
- nbqa
- boto3-stubs
- mypy-boto3-logs
- ansiwrap
- pandera
- pygam
- mypy-boto3-s3
- hydra-core
- lsprotocol
- mypy-boto3-sagemaker
- docker
- sphinxcontrib-apidoc
- types-python-dateutil
- esbonio
- daal
- mypy-boto3-ecs
- tokenize-rt
- autoflake
- duckdb-engine
- pygls
- mypy-boto3-ssm
- mypy-boto3-ec2
- sqlalchemy2-stubs
- botocore-stubs
- mypy-boto3-iam
- smdebug-rulesconfig
- omegaconf
- snowflake-sqlalchemy
- papermill
- tbump
- types-s3transfer
- mypy-boto3-sts
- sphinxcontrib-confluencebuilder
- pip-tools
- types-awscrt
- com2ann
- atlassian-python-api
- mypy-boto3-kms
- textwrap3
- pyspellchecker
- mypy-boto3-ecr

Packages not found on PyPI (0):

Total packages checked: 48
Available o