In [2]:
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import seaborn as sns
import pandas_gbq
import pydata_google_auth
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.cloud import bigquery
from tqdm.notebook import tqdm
from dateutil import tz
from BigQueryTools import QueryTool
import re

SCOPES = [
    'https://www.googleapis.com/auth/cloud-platform',
    'https://www.googleapis.com/auth/drive',
]
credentials = pydata_google_auth.get_user_credentials(
    SCOPES,
    # Set auth_local_webserver to True to have a slightly more convienient
    # authorization flow. Note, this doesn't work if you're running from a
    # notebook on a remote sever, such as over SSH or with Google Colab.
    auth_local_webserver=True,
)

%load_ext google.cloud.bigquery
%env GCLOUD_PROJECT=nbcu-ds-sandbox-a-001

env: GCLOUD_PROJECT=nbcu-ds-sandbox-a-001


In [12]:
start_date = '2022-01-01'
end_date = '2023-11-30'

In [13]:
def get_monthly_report_dates(start_date, end_date):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    monthly_reports = {}

    while start <= end:
        month_year = start.strftime("%b_%Y")
        report_start_date = start.replace(day=1)
        next_month = start.replace(day=28) + timedelta(days=4)
        report_end_date = next_month - timedelta(days=next_month.day)
        monthly_reports[month_year] = {
            "report_start_date": report_start_date.strftime("%Y-%m-%d"),
            "report_end_date": report_end_date.strftime("%Y-%m-%d")
        }
        start = next_month

    return monthly_reports

def get_quarter(p_date) -> int:
    return (p_date.month - 1) // 3 + 1

def get_quarterly_report_dates(start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')

    quarter_dates = {}

    while start_date <= end_date:
        quarter = get_quarter(start_date)
        quarter_name = 'Q{}_{}'.format(quarter, start_date.year)
        quarter_start = datetime(start_date.year, (start_date.month - 1) // 3 * 3 + 1, 1)
        quarter_end = quarter_start.replace(year=quarter_start.year + 3 * quarter // 12, month=(quarter_start.month + 3) % 12, day=1) - timedelta(days=1)
        quarter_dates[quarter_name] = {
            'report_start_date': quarter_start.strftime('%Y-%m-%d'),
            'report_end_date': quarter_end.strftime('%Y-%m-%d')
        }
        start_date = quarter_end + timedelta(days=1)

    return quarter_dates

def generate_report_variants(period_dict, suffix="report"):
    return {'_'.join([period, suffix]): {'string_format': {'report': period}, 'params': params} for period, params in period_dict.items()}

In [14]:
months = get_monthly_report_dates(start_date, end_date)
quarters = get_quarterly_report_dates(start_date, end_date)
monthly_viewing_tables = generate_report_variants(months, 'viewing')
quarterly_viewing_tables = generate_report_variants(quarters, 'viewing')
month_tables = generate_report_variants(months)
quarter_tables = generate_report_variants(quarters)
bq = QueryTool()

# Viewing Tables

In [6]:
with open('CRM_Viewing.sql') as file:
    sql_file = file.read()
    bq.add(sql_file, monthly_viewing_tables)
    bq.add(sql_file, quarterly_viewing_tables)

Added: Oct_2023_viewing	 | 	Awaiting start
Added: Nov_2023_viewing	 | 	Awaiting start
Added: Q4_2023_viewing	 | 	Awaiting start


In [7]:
bq.start()

start 4b4cccf9-9286-4811-a920-2a0eabfe1efastart 8b41f0fb-0280-4215-b469-deb1e71056ba

start 0c9f618a-7252-4362-9961-2684d35fab97
All jobs in queue started


In [11]:
bq.update()

Active jobs: 


Finished jobs: 
Oct_2023_viewing	 | 	8b41f0fb-0280-4215-b469-deb1e71056ba	 | 	Created: Dec 05, 2023 11:28 AM	 | 	Last Checked: Dec 05, 2023 12:08 PM	 | 	DONE
Nov_2023_viewing	 | 	4b4cccf9-9286-4811-a920-2a0eabfe1efa	 | 	Created: Dec 05, 2023 11:28 AM	 | 	Last Checked: Dec 05, 2023 12:08 PM	 | 	DONE
Q4_2023_viewing	 | 	0c9f618a-7252-4362-9961-2684d35fab97	 | 	Created: Dec 05, 2023 11:28 AM	 | 	Last Checked: Dec 05, 2023 01:57 PM	 | 	DONE


In [15]:
# Merge and group monthly results
query = """
        CREATE or replace TABLE `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_VIDEO_VIEWING_MONTHLY` AS 
        
        SELECT *
        FROM ( 
        """ + \
        (' UNION ALL ').join([f"(select * from `nbcu-ds-sandbox-a-001.SLi_sandbox.Video_Viewing_{report}`)" for report in months]) + \
        """
        )
        """
        
with bigquery.Client() as client:
    client.query(query).result()

In [24]:
%%bigquery

SELECT * from `nbcu-ds-sandbox-a-001.SLi_sandbox.SILVER_VIDEO_VIEWING_MONTHLY` limit 10;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,Report_Month,adobe_tracking_id,Distinct_Content_Starts,Total_Content_Starts,Viewing_Time,Distinct_Viewing_Sessions,Repertoire_Pavo_Method
0,2022-09-01,Gy41X9ZjW7Bj8i6TdvAMRlmGjhsScJm5UamSbJFTdH4=,1,256,0.0,1,0
1,2023-06-01,Xc/AkZ8lyH9G+/8LhMLa40+p7+YUfKUXROT6T4RyBwk=,1,256,68.989167,61,1
2,2022-07-01,/SjNXNJm2Ob3H5UgYgi9BN4sCG18QJXVth3x0MRbS+M=,1,256,50.935,67,1
3,2023-04-01,9JmibFKdw5nmaA5PdlhdpxKINzIldwNxGVCslLbqZn0=,1,256,114.795,39,1
4,2023-03-01,eho7G7Uw3ul9wRa5NoYD8QvzYZOKi+Z+Y9zw0zB5oA4=,1,256,83.925278,32,2
5,2023-09-01,9mEtbY1pG3K7uJJ7HpEaLZGCIoMg/rvTF9UulhH/cmY=,1,256,10.385833,3,2
6,2023-07-01,Jym30NbY7oW0GnTfu68WRmR36tUerVvZYk62OK5/L2w=,1,256,176.461944,42,2
7,2023-01-01,RskRDrmxk/u+Ufkz7cSzkD8+dmJ2Wm+X9D6H0HIzK9s=,1,256,74.553333,20,2
8,2022-01-01,9OjLneXK498dfkR9RrwzzJXm9Os05AOLIjDAF3DDSLA=,1,256,115.343889,64,2
9,2023-08-01,NCtZ3f/d/vFTzcKuIwXqlLHwNveFBRm5UMhnuV9oNGs=,1,256,48.297778,103,2
