In [2]:
##nodejs:  https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/setting-up-node-on-ec2-instance.html

!pip install "jupyterlab>=3" "ipywidgets>=7.6"
!pip install jupyter-dash
!jupyter lab build


!pip install snowflake --user
!pip install snowflake-connector-python --user
!pip install category_encoders
!pip install xgboost
!pip install lightgbm --user
import os
import sys
path=!pwd
# sys.path.append(os.path.join(path[0], '..'))
# sys.path.append('/home/ec2-user/SageMaker/jupyter-notebooks/')
# from utils import *
import snowflake.connector
from datetime import timedelta
from abc import ABCMeta, abstractmethod
import boto3
import json

import logging 

logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)

from category_encoders import OneHotEncoder
import xgboost as xgb
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import r2_score as r2_score
import sklearn.model_selection

from io import StringIO
class Utils():
    @staticmethod
    def to_csv_s3(content, bucket, key_path, filename):
        client = boto3.client('s3')
        key = os.path.join(key_path, filename)
        csv_buffer = StringIO()
        content.to_csv(csv_buffer)
        client.put_object(Bucket=bucket, Key=key, Body=csv_buffer.getvalue())
        logger.info(f'Saved to {bucket}/{key}')
    
    @staticmethod
    def to_pkl_s3(content, bucket, key_path, filename):
        client = boto3.client('s3')
        key = os.path.join(key_path, filename)
        obj = pickle.dumps(content)
        client.put_object(Bucket=bucket, Key=key, Body=obj)
        logger.info(f'Saved model to {os.path.join(bucket, key)}')
        logger.info(f'Saved to {bucket}/{key}')

    @staticmethod
    def read_csv_s3(bucket, key_path,filename):
        client = boto3.client('s3')
        key = os.path.join(key_path, filename)
        logger.info(f'Reading from {bucket}/{key}')
        obj = client.get_object(Bucket=bucket, Key=key)
        df = pd.read_csv(obj['Body'], na_values="\\N")
        return df
        
    @staticmethod
    def read_pkl_s3(bucket, key_path,filename):
        client = boto3.client('s3')
        key = os.path.join(key_path, filename)
        logger.info(f'Reading from {bucket}/{key}')
        obj = client.get_object(Bucket=bucket, Key=key)
        body = obj['Body'].read()
        model = pickle.loads(body)
        return model



class Credentials(metaclass=ABCMeta):
    pass
    
    
class SSMPSCredentials(Credentials):
    def __init__(self, secretid: str):
        self._secretid = secretid
        self._secrets = {}
        
    def get_keys(self):
        """
        credential fetching 
        """
        _aws_sm_args = {'service_name': 'secretsmanager', 'region_name': 'us-east-1'}
        secrets_client = boto3.client(**_aws_sm_args)
        get_secret_value_response = secrets_client.get_secret_value(SecretId=self._secretid)
        return get_secret_value_response
    
    
class BaseConnector(metaclass=ABCMeta):
    @abstractmethod
    def connect(self):
        raise NotImplementedError
        

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx


def run_query(query, dbname, schema):
    SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

    conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
    ctx=conn.connect(dbname,schema)
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df    

## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_PROD","DATASCIENCE_STAGE")
cur = ctx.cursor()

def cvdf_to_snowflake(df, table_name):
    stage = '@HBO_OUTBOUND_DATASCIENCE_CONTENT_DEV'
    output_bucket = "hbo-outbound-datascience-content-dev"
    dbname, schema = 'MAX_DEV', 'WORKSPACE'
    
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index = False)
    content = csv_buffer.getvalue()
    client = boto3.client('s3')
    client.put_object(Bucket=output_bucket, Key=table_name, Body=content)

    print ('Create Table: ' + table_name)
 
    run_query('''
    create or replace table {table_name} (
    title_name varchar,
    effective_start_date varchar,
    season_number int, 
    tier varchar,
    content_category  varchar,
    category varchar,
    prequel_count int,
    prequel_featured_count int,
    prequel_featured_count_s int,
    page_views int,
    page_views_s int,
    tier_adjusted int,
    first_views_pred float,
    model_pred_date varchar,
    schedule_label varchar
    )
    '''.format(table_name = table_name), dbname, schema)

    print ('Begin Uploading')
    run_query('''
    insert into max_dev.workspace.{table_name}

    select 
              $1
            , $2
            , $3
            , $4
            , $5
            , $6
            , $7
            , $8
            , $9
            , $10
            , $11
            , $12
            , $13
            , $14
            , $15
    from {stage}/psi_first_views/dev/fv_pred_munged_2022-06-23_adhoc.csv

     (FILE_FORMAT => csv_v2)

    '''.format(stage = stage, table_name = table_name,
              file_name = table_name+'.csv')
            , dbname, schema)

    print ('Finish Uploading')   
    
    
import io

output_bucket = 'hbo-outbound-datascience-content-dev'
key_path = 'psi_first_views/dev'

Utils.read_csv_s3(output_bucket, key_path, f'fv_pred_munged_2022-06-23_adhoc.csv')
df_pred_future_out = df_pred_future_out.reset_index()
cvdf_to_snowflake(df_pred_future_out, 'firstview_postgl_temp')

#  hbo-outbound-datascience-content-dev/psi_first_views/psi_monthly_xgb_forecast.csv

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Collecting ansi2html
  Downloading ansi2html-1.8.0-py3-none-any.whl (16 kB)
Collecting dash
  Downloading dash-2.8.1-py3-none-any.whl (9.9 MB)
     |████████████████████████████████| 9.9 MB 40.6 MB/s            
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting dash-html-components==2.0.0
  Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Collecting importlib-metadata
  Downloading importlib_metadata-4.8.3-py3-none-any.whl (17 kB)
Collecting dash-table==5.0.0
  Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Collecting dash-core-components==2.0.0
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Installing collected packages: importlib-metadata, dash-table, dash-html-components, dash-core-components, retrying, dash, ansi2html, jupyter-dash
  Attempting uninstall: importlib-metadata
    Found existing installation: importlib-metadat

ModuleNotFoundError: No module named 'snowflake'