# Refresh Moat Datasets for Q4 Campaigns

In [11]:
import sys
sys.path.append('../')

In [12]:
# essence/rtf imports
from rtf_utils.moat_utils import MoatTile,moat_schemas
from essence.analytics.platform import securedcredentials as secure_creds
from rtf_utils.gcp_utils import BigQuery,CloudStorage

# std lib imports
import json
import logging
import warnings
import os
import time

In [3]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
warnings.filterwarnings("ignore", "Your application has authenticated using end user credentials")
os.environ['GOOGLE_CLOUD_PROJECT'] = 'essence-analytics-dwh' # for some reason gsutil forgets my project

# Load Creds

In [13]:
service_account_email='131786951123-compute@developer.gserviceaccount.com' ## gcp analytics service account
PROJECT_ID='essence-analytics-dwh'

credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email)
print("Creds Loaded")
moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google')
print("Token Loaded")

Creds Loaded
Token Loaded


## Q4 Moat Tiles - (Add new tiles here)
### These is the moat "task" list with the tiles and filters we currently care about

In [14]:
## Define Moat Tasks for Stadia/Nest/Assistant
q4_tiles = [
    ## Google Disp
    (2506,{'level1':22783112},['date','level1','level3']), ##  stadia,
    (2506,{'level1':23221764},['date','level1','level3']), ##  nest 1,
    (2506,{'level1':23197607},['date','level1','level3']), ##  nest 2
    (2506,{'level1':23278016},['date','level1','level3']), ##  Assistant
    (2506,{'level1':23219466},['date','level1','level3']), # Pixel
       
    ## YT TrueView ##
    (13386,{'level1':7492919005},['date','level1','level4']), # Stadia
    (13386,{'level1':7020493427},['date','level1','level4']), # Pixel
   
    ## YT Reserve 
    (6179366,{'level2':2604566125},['date','level2','level4']),# Stadia
    
    ## YT Reserve DBM
    (6178833,{'level1':1533586},['date','level1','level4']), # Assistant

    ## FB Display
    (6195503,{'level1':10154328017481183},['date','level1','level4']), # Pixel
   
    ## FB Video NA
    (6195505,{'level1':10154328017481183},['date','level1','level4']),# pixel need acccess
    ## FB Video Master
    (8268,{'level1':10154328017481183},['date','level1','level4']),
    
    ## Twitter Display
    (6195541,{'level1':4503599660729481},['date','level1','level2'])#pixel
 ]

In [15]:
def get_moat_data(tile_id,start_date, end_date,level_filters=None,dimensions=None,**kwargs):
    tile = MoatTile(tile_id,level_filters,dimensions)
    local_filename = tile.get_data(start_date,end_date,moat_token)
    return local_filename

def upload_local_file(local_filename,dest_bucket,folder=None):
    gcs = CloudStorage(credentialsFromVault)
    if folder:
        blob_name = folder + "/" + local_filename
    else:
        blob_name = local_filename
    blob = gcs.upload_blob(dest_bucket, blob_name,local_filename,mode='filename')
    return blob

#### Run this to Get Data and Upload to GCS
- Will store each export in a tile specific directory in brand reporting
    - **ex**: `gs://rtf_staging/brand_reporting/2506`
- `start_date` and `end_date` variables will control what date range the moat exports are pulled for

In [None]:
start_date = '20191001'
end_date = '20191020'

files_in_gcs = [] ## list of tiles that are uploaded
for x in q4_tiles:
    time.sleep(10) # for rate limiting
    tile_id, level_filter, dimensions = x #  unpack tuple
    print("Get data for {]}".format(tile_id))
    
    filename = get_moat_data(tile_id,start_date,end_date,level_filter,dimensions)
    
    if filename:
        print("Stored at {}".format(filename))
    else:
        print("No Data")
        continue    
    
    try:
        print("Try GCS Upload")
        blob = upload_local_file(filename, bucket = "rtf_staging", folder = "brand_reporting/{}".format(tile_id))
        print("Clean Up File")
        files_in_gcs.append(tile_id)
        os.remove(filename)
    except Exception as e:
        print(e)
        continue

## Upload to BigQuery Tools

In [4]:
from google.cloud import bigquery
bq = bigquery(credentialsFromVault)

NameError: name 'credentialsFromVault' is not defined

In [None]:
dest_dataset = 'RTF_DWH_Moat'
tiles_in_gcs = list(set(tiles_in_gcs)) ## get unique tile_ids from loaded tiles

for tile_id in tiles_in_gcs :
    print(tile_id)
    bq_schema = [bigquery.SchemaField.from_api_repr(x) for x in moat_schemas.get(tile_id)] ## loads schema from moat_utils

    load = bq.load_from_gcs(dest_dataset, "gs://rtf_staging/brand_reporting/{}*".format(tile_id), "{}_20191001_20191020".format(tile_id),bq_schema,extension='json')
    print(load.job_id)