# Sagemaker notebook Git, Snowflake and S3 set up

## Git

## S3

## Snowflake

In [None]:
## Run the following pip install commands and restart the notebook kernel 

# !pip install snowflake --user
# !pip install snowflake-connector-python --user

In [None]:
###### import pandas as pd
import json
import snowflake.connector
from abc import ABCMeta, abstractmethod
import boto3

## Limit Size of Returned Records
MAX_QUERY_RETURN_SIZE = 1000000

class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
        

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    

In [4]:
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_PROD","DATASCIENCE_STAGE")
cur = ctx.cursor()


In [5]:
# Execute a statement that will generate a result set.
querystr='''
    select *
    from max_prod.content_intelligence.future_programming_schedule
    limit 2
'''
cur.execute(querystr)
# Fetch the result set from the cursor and deliver it as the Pandas DataFrame.

colstring = ','.join([col[0] for col in cur.description])
df = pd.DataFrame(cur.fetchall(), columns =colstring.split(","))
display(df)

df.to_csv('test.csv')

Unnamed: 0,RELEASE_MONTH,CATEGORY,UNCLEANED_TITLE,TITLE,SEASON,TIER,NUM_EPISODES_RELEASED,NUM_HOURS_RELEASED,NUM_PREMIERING_TITLES
0,2020-08-01,DOCUMENTARY FEATURES,Class Action Park,Class Action Park,0,3,1,2,1
1,2020-08-01,DOCUMENTARY FEATURES,On the Trail: Inside the 2020 Primaries,On the Trail: Inside the 2020 Primaries,0,3,1,2,1
