In [None]:
"""
https://cloud.google.com/bigquery/docs/reference/libraries#client-libraries-install-python

https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#bigquery_simple_app_query-python

BigQuery Query History: https://console.cloud.google.com/bigquery?project=manymoons-215635&page=queries

Remember to* pip install --upgrade google-cloud-bigquery

"""
# Imports the Google Cloud client library
from google.cloud import bigquery

# Instantiates a client
bigquery_client = bigquery.Client(project='manymoons-215635')

# Import pandas for output format as DataFrame
import pandas as pd

### Function to grab table schema

In [None]:
def schema_row_to_dict(schema_row):
    return {
        'colname': schema_row.name,
        'type': schema_row.field_type,
        'mode': schema_row.mode,
        'description': schema_row.description,
        'fields': schema_row.fields
    }

In [None]:
def describe_table(table='bq_events_all', dataset='dataset_dev'):
    dataset_ref = bigquery_client.dataset(dataset)
    bq_events = dataset_ref.table(table)
    tab = bigquery_client.get_table(bq_events)
    return pd.DataFrame([schema_row_to_dict(row) for row in tab.schema])

### Main BQ Query Function

In [None]:
def query_BQ(query):
    """
    Main Query Function 
    
    Available datasets: 'dataset_dev'
    
    Available tables: 'bq_events_all'
    
    Example Query: "SELECT DISTINCT(event_type), COUNT(event_type) event_count FROM dataset_dev.bq_events_all GROUP BY event_type"
    """
    query_job = bigquery_client.query(query)
    results = query_job.result()
    return pd.DataFrame([dict(row) for row in results])

### Get Query-able Fields

In [None]:
out_df = describe_table(table='bq_events_all', dataset='dataset_dev')

In [None]:
out_df

### Query Events within a random session (with a non-anonymous user)

In [None]:
query = """
    SELECT client_event_time, session_id, event_type, user_properties FROM `dataset_dev.bq_events_all`
    WHERE session_id = (SELECT session_id FROM dataset_dev.bq_events_all ORDER BY RAND() LIMIT 1) 
    AND user_properties NOT LIKE '%anonymousId%' ORDER BY client_event_time LIMIT 100;
"""
out_df = query_BQ(query)

In [None]:
out_df.head()

### Query All event types and # of occurrences

In [None]:
out_df = query_BQ("""
    SELECT DISTINCT(event_type), COUNT(event_type) event_count FROM dataset_dev.bq_events_all GROUP BY event_type
""")

In [None]:
out_df.head()