In [1]:
!pip install --user google-cloud-bigquery-storage==2.16.0



In [None]:
# Uncomment and run cell if using Google Colab 

# from google.colab import auth
# auth.authenticate_user()

In [2]:
from google.cloud.bigquery_storage import BigQueryReadClient
from google.cloud.bigquery_storage import types
from google.cloud import bigquery_storage
import pandas
import os

In [3]:
BILLING_PROJECT_ID = 'michaelabel-demo'
PROJECT_ID = 'bigquery-public-data'
DATASET_ID = 'google_trends'
TABLE_ID = 'international_top_rising_terms'

table_spec = f'projects/{PROJECT_ID}/datasets/{DATASET_ID}/tables/{TABLE_ID}'
parent = f'projects/{BILLING_PROJECT_ID}'

In [4]:
read_options = types.ReadSession.TableReadOptions(
        selected_fields=["term", "country_name", "region_name", "percent_gain"],
        row_restriction='percent_gain > 500'
)

In [5]:
bqstorageclient = bigquery_storage.BigQueryReadClient()

requested_session = types.ReadSession(
        table=table_spec,
        data_format=types.DataFormat.ARROW,
        read_options=read_options,
    )

read_session = bqstorageclient.create_read_session(
        parent=parent,
        read_session=requested_session,
        max_stream_count=4,
    )

stream = read_session.streams[3] 
reader = bqstorageclient.read_rows(stream.name)



In [6]:
frames = []
for message in reader.rows(read_session).pages:
    frames.append(message.to_dataframe())
dataframe = pandas.concat(frames)
print(dataframe.head())
print('Number of streams: ', len(read_session.streams))

  country_name       term region_name  percent_gain
0     Malaysia  exit poll    Selangor          1050
1     Malaysia  exit poll    Selangor          1050
2     Malaysia  exit poll    Selangor          1050
3     Malaysia  exit poll    Selangor          1050
4     Malaysia  exit poll    Selangor          1050
Number of streams:  4


In [7]:
def read_data(table_spec, read_options, no_streams):
    
    bqstorageclient = bigquery_storage.BigQueryReadClient()

    requested_session = types.ReadSession(
            table=table_spec,
            data_format=types.DataFormat.ARROW,
            read_options=read_options,
        )

    read_session = bqstorageclient.create_read_session(
            parent=parent,
            read_session=requested_session,
            max_stream_count=no_streams,
        )

    stream = read_session.streams[3] 
    reader = bqstorageclient.read_rows(stream.name)
    
    frames = []
    for message in reader.rows(read_session).pages:
        frames.append(message.to_dataframe())
    dataframe = pandas.concat(frames)
    print(dataframe.head())
    print('Number of streams: ', len(read_session.streams))
    
    return dataframe 

In [8]:
%%time 
_ = read_data(table_spec, read_options, 4)

  country_name       term region_name  percent_gain
0     Malaysia  exit poll    Selangor          1050
1     Malaysia  exit poll    Selangor          1050
2     Malaysia  exit poll    Selangor          1050
3     Malaysia  exit poll    Selangor          1050
4     Malaysia  exit poll    Selangor          1050
Number of streams:  4
CPU times: user 42.2 s, sys: 14.6 s, total: 56.8 s
Wall time: 44.4 s


In [9]:
%%time 
_ = read_data(table_spec, read_options, 16)

  country_name                  term           region_name  percent_gain
0      Denmark  australia vs england  North Denmark Region          2000
1      Denmark  australia vs england  North Denmark Region          2000
2      Denmark  australia vs england  North Denmark Region          2000
3      Denmark  australia vs england  North Denmark Region          2000
4      Denmark  australia vs england  North Denmark Region          2000
Number of streams:  16
CPU times: user 7.99 s, sys: 2.56 s, total: 10.6 s
Wall time: 9.83 s


In [16]:
%%time 
_ = read_data(table_spec, read_options, 64)

   country_name                  term      region_name  percent_gain
0  Saudi Arabia  pakistan vs zimbabwe  Riyadh Province          3750
1  Saudi Arabia  pakistan vs zimbabwe  Riyadh Province          3750
2  Saudi Arabia  pakistan vs zimbabwe  Riyadh Province          3750
3  Saudi Arabia  pakistan vs zimbabwe  Riyadh Province          3750
4  Saudi Arabia  pakistan vs zimbabwe  Riyadh Province          3750
Number of streams:  64
CPU times: user 2.66 s, sys: 941 ms, total: 3.6 s
Wall time: 4.44 s
