# Refresh Moat Datasets for Q4 Campaigns

In [29]:
import sys
sys.path.append('../')
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
%autoreload 2

In [66]:
# essence/rtf imports
from rtf_utils.moat_utils import MoatTile,moat_schemas
from essence.analytics.platform import securedcredentials as secure_creds
from rtf_utils.gcp_utils import BigQuery,CloudStorage

# std lib imports
import json
import logging
import warnings
import os
import time

In [38]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
warnings.filterwarnings("ignore", "Your application has authenticated using end user credentials")
os.environ['GOOGLE_CLOUD_PROJECT'] = 'essence-analytics-dwh' # for some reason gsutil forgets my project

# Load Creds

In [39]:
service_account_email='131786951123-compute@developer.gserviceaccount.com' ## gcp analytics service account
PROJECT_ID='essence-analytics-dwh'

credentialsFromVault=secure_creds.getCredentialsFromEssenceVault(service_account_email)
print("Creds Loaded")
moat_token = secure_creds.getDataFromEssenceVault('Moat_Token_Google')
print("Token Loaded")

Creds Loaded
Token Loaded


## Q4 Moat Tiles - (Add new tiles here)
### These is the moat "task" list with the tiles and filters we currently care about

In [48]:
## Define Moat Tasks for Stadia/Nest/Assistant
q4_tiles = [
## Google Disp
(2506,{'level1':22783112},['date','level1','level3']), ##  stadia,
(2506,{'level1':23221764},['date','level1','level3']), ##  nest 1,
(2506,{'level1':23197607},['date','level1','level3']), ##  nest 2
(2506,{'level1':23278016},['date','level1','level3']), ##  Assistant
(2506,{'level1':23219466},['date','level1','level3']), # Pixel

## Google Video
(2698,{'level1':23197607},['date','level1','level3']), # Nest
(2698,{'level1':23219466},['date','level1','level3']), # Pixel

## YT TrueView Master##
(13386,{'level1':7492919005},['date','level1','level4']), # Stadia
(13386,{'level1':7020493427},['date','level1','level4']), # Pixel
(13386,{'level1':8071529839},['date','level1','level4']), # Nest NA
(13386,{'level1':1703715956},['date','level1','level4']), # assistant


## YT TrueView NA
#(6196171,{'level1':7020493427},['date','level1','level4']), # Pixel    >>> need acccess

## YT Reserve 
(6179366,{'level2':2604566125},['date','level2','level4']),# Stadia

## YT Reserve DBM
(6178833,{'level1':1533586},['date','level1','level4']), # Assistant

## FB Display
(6195503,{'level1':10154328017481183},['date','level1','level4']), # Pixel

## FB Video NA
(6195505,{'level1':10154328017481183},['date','level1','level4']),# pixel need acccess
(6195505,{'level1':242547403246039},['date','level1','level4']), #nest
(6195505,{'level1':1381439485460084},['date','level1','level4']), #store

## FB Video Master
(8268,{'level1':10154328017481183},['date','level1','level4']),

## IG Video
(6195510,{'level1':242547403246039},['date','level1','level4']), # Nest
(6195510,{'level1':1381439485460084},['date','level1','level4']), # Store
(6195510,{'level1':10154328017481183},['date','level1','level4']), # Pixel


## Twitter Display
(6195541,{'level1':4503599660729481},['date','level1','level2']),#pixel

## TW Video
(6195543,{'level1':4503599660729481},['date','level1','level2']), #pixel
(6195543,{'level1':168264219},['date','level1','level2']), #nest
]



In [45]:
def get_moat_data(tile_id,start_date, end_date,level_filters=None,dimensions=None,**kwargs):
    tile = MoatTile(tile_id,level_filters,dimensions)
    local_filename = tile.get_data(start_date,end_date,moat_token)
    return local_filename

def upload_local_file(local_filename,dest_bucket,folder=None):
    gcs = CloudStorage(credentialsFromVault)
    if folder:
        blob_name = folder + "/" + local_filename
    else:
        blob_name = local_filename
    blob = gcs.upload_blob(dest_bucket, blob_name,local_filename,mode='filename')
    return blob

#### Run this to Get Data and Upload to GCS
- Will store each export in a tile specific directory in brand reporting
    - **ex**: `gs://rtf_staging/brand_reporting/2506`
- `start_date` and `end_date` variables will control what date range the moat exports are pulled for

In [49]:
import os
start_date = '20191001'
end_date = '20191027'

files_in_gcs = [] ## list of tiles that are uploaded
for x in q4_tiles:
    os.system("clear")
    time.sleep(10) # for rate limiting
    tile_id, level_filter, dimensions = x #  unpack tuple
    print("Get data for {}".format(tile_id))
    
    filename = get_moat_data(tile_id,start_date,end_date,level_filter,dimensions)
    
    if filename:
        print("Stored at {}".format(filename))
    else:
        print("No Data")
        continue    
    
    try:
        print("Try GCS Upload")
        blob = upload_local_file(filename, dest_bucket = "rtf_staging", folder = "brand_reporting/{}".format(tile_id))
        print("Clean Up File")
        files_in_gcs.append(tile_id)
        os.remove(filename)
    except Exception as e:
        print(e)
        continue

INFO:root:API Request Time


Get data for 6179366


INFO:root:6179366_2604566125.json Saved


Stored at 6179366_2604566125.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6179366/6179366_2604566125.json.


Clean Up File


INFO:root:API Request Time


Get data for 6178833


INFO:root:6178833_1533586.json Saved


Stored at 6178833_1533586.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6178833/6178833_1533586.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195503


INFO:root:6195503_10154328017481183.json Saved


Stored at 6195503_10154328017481183.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195503/6195503_10154328017481183.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195505


INFO:root:6195505_10154328017481183.json Saved


Stored at 6195505_10154328017481183.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195505/6195505_10154328017481183.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195505


INFO:root:6195505_242547403246039.json Saved


Stored at 6195505_242547403246039.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195505/6195505_242547403246039.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195505


INFO:root:6195505_1381439485460084.json Saved


Stored at 6195505_1381439485460084.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195505/6195505_1381439485460084.json.


Clean Up File


INFO:root:API Request Time


Get data for 8268


INFO:root:8268_10154328017481183.json Saved


Stored at 8268_10154328017481183.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/8268/8268_10154328017481183.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195510


INFO:root:6195510_242547403246039.json Saved


Stored at 6195510_242547403246039.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195510/6195510_242547403246039.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195510


INFO:root:6195510_1381439485460084.json Saved


Stored at 6195510_1381439485460084.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195510/6195510_1381439485460084.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195510


INFO:root:6195510_10154328017481183.json Saved


Stored at 6195510_10154328017481183.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195510/6195510_10154328017481183.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195541


INFO:root:6195541_4503599660729481.json Saved


Stored at 6195541_4503599660729481.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195541/6195541_4503599660729481.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195543


INFO:root:6195543_4503599660729481.json Saved


Stored at 6195543_4503599660729481.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195543/6195543_4503599660729481.json.


Clean Up File


INFO:root:API Request Time


Get data for 6195543


INFO:root:6195543_168264219.json Saved


Stored at 6195543_168264219.json
Try GCS Upload


INFO:root:File uploaded as brand_reporting/6195543/6195543_168264219.json.


Clean Up File


## Upload to BigQuery Tools

In [11]:
from google.cloud import bigquery
bq = BigQuery(credentialsFromVault)

In [67]:
dest_dataset = 'RTF_DWH_Moat'
tiles_in_gcs = list(set(files_in_gcs)) ## get unique tile_ids from loaded tiles

tiles_in_gcs = tiles_in_gcs + [2506,2698,13386]

for tile_id in tiles_in_gcs :
    print(tile_id)
    try:
        bq_schema = [bigquery.SchemaField.from_api_repr(x) for x in moat_schemas.get(tile_id)] ## loads schema from moat_utils
    except:
        print("Uhoh")
        continue
    #load = bq.load_from_gcs(dest_dataset, "gs://rtf_staging/brand_reporting/{}*".format(tile_id), "004_{}_{}".format(tile_id,end_date),bq_schema,extension='json') weekly
    load = bq.load_from_gcs(dest_dataset, "gs://rtf_staging/brand_reporting/{}*".format(tile_id), "003_{}".format(tile_id),bq_schema,extension='json')
    print(load.job_id)

6179366
d072b682-25d6-44d9-9ef5-badc5b960ae4
8268
eaf68b11-1bbb-466d-a6a6-d5f1dfd69c1d
6195503
c64f8a5e-ee14-4672-b0e4-db93a2e990b4
6178833
46d1284f-3991-4404-95b0-5b542bfd1d97
6195505
c7dd3c40-82cb-4669-90bb-b074f48fbb27
6195541
93ebfe1e-a6c5-457f-a046-4d399d5a1cc0
6195510
59df72e8-6759-4b02-99ec-049bde8d63b5
6195543
0aa45e66-dc9b-461c-9522-d42a0a96bdfe
2506
f794d43f-4c6a-4c26-b284-cf4aae6acaf8
2698
1ad75ddb-2e98-42d6-ac3e-5d1b3d9e7cce
13386
bf9aced7-b3bf-4773-a50b-620ae964bd75


In [55]:
tiles_in_gcs

[6179366,
 8268,
 6195503,
 6178833,
 6195505,
 6195541,
 6195510,
 6195543,
 2506,
 2698,
 13386]

In [58]:
[(x,MoatTile.tiles_meta.get(x).get('type')) for x in tiles_in_gcs]

[(6179366, 'video'),
 (8268, 'video'),
 (6195503, 'disp'),
 (6178833, 'video'),
 (6195505, 'video'),
 (6195541, 'disp'),
 (6195510, 'video'),
 (6195543, 'video'),
 (2506, 'disp'),
 (2698, 'video'),
 (13386, 'video')]

## To Do
Add ,,,