In [None]:
!pip install google-api-core[grpc]==2.7.0

In [None]:
!pip install google-cloud-bigquery

In [2]:
import os
from google.cloud import bigquery

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] =  r'ServiceKey_GoogleCloud.json'
client = bigquery.Client()

### Query

In [3]:
QUERY = (
    'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
    'WHERE state = "TX" '
    'LIMIT 100')
query_job = client.query(QUERY)  
rows = query_job.result()  
for row in rows:
    print(row.name)

Frances
Alice
Beatrice
Ella
Gertrude
Josephine
Lula
Blanche
Marjorie
Christine
Margarita
Alta
Sara
Naomi
Sofia
Andrea
Delfina
Dominga
Elnora
Adele
Rafaela
Dixie
Luisa
Bess
Ernestine
Lorine
Patsy
Cecelia
Enriqueta
Lucinda
Alyce
Oneta
Apolonia
Gloria
Rhoda
Buna
Maye
Pansy
Gladys
Maria
Helen
Velma
Clara
Anna
Evelyn
Rosa
Mae
Bernice
Stella
Vivian
Mable
Maggie
Betty
Guadalupe
Lorene
Susie
Sadie
Erma
Verna
Amelia
Estelle
Nell
Margie
Celia
Lessie
Eloise
Millie
Olga
Letha
Rachel
Luz
Gussie
Sylvia
Aline
Barbara
Iris
Leila
Mozelle
Simona
Catalina
Hester
Willia
Allene
Avis
George
Isabella
Polly
Syble
Teodora
Lennie
Ricarda
Valerie
Frances
Gertrude
Sarah
Rosa
Lucy
Nora
Nettie
Odessa


### Create Table

In [8]:
schema = [
    bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]


table = bigquery.Table(table_id, schema=schema)
table = client.create_table(table)  # Make an API request.
print(
    "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)

Created table dogwood-reserve-352205.demo_bq.test


### Create Table with Partition

In [11]:
project = client.project

dataset_ref = bigquery.DatasetReference(project, 'demo_bq') ### Dataset Name

table_ref = dataset_ref.table("my_partitioned_table")  ### Table name
schema = [
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("post_abbr", "STRING"),
    bigquery.SchemaField("date", "DATE"),
]
table = bigquery.Table(table_ref, schema=schema)
table.time_partitioning = bigquery.TimePartitioning(
    type_=bigquery.TimePartitioningType.DAY,
    field="date", 
    expiration_ms=7776000000,
)  # 90 days

table = client.create_table(table)

print(
    "Created table {}, partitioned on column {}".format(
        table.table_id, table.time_partitioning.field
    )
)

Created table my_partitioned_table, partitioned on column date


### Create Table with Cluster

In [12]:
table_id = "your-project.your_dataset.your_table_name"


schema = [
    bigquery.SchemaField("full_name", "STRING"),
    bigquery.SchemaField("city", "STRING"),
    bigquery.SchemaField("zipcode", "INTEGER"),
]

table = bigquery.Table(table_id, schema=schema)
table.clustering_fields = ["city", "zipcode"]
table = client.create_table(table)  # Make an API request.
print(
    "Created clustered table {}.{}.{}".format(
        table.project, table.dataset_id, table.table_id
    )
)

Created clustered table dogwood-reserve-352205.demo_bq.test_cluster


### Delete a Table

In [15]:
table_id = "your-project.your_dataset.your_table_name"
client.delete_table(table_id, not_found_ok=True)  # Make an API request.
print("Deleted table '{}'.".format(table_id))

Deleted table 'dogwood-reserve-352205.demo_bq.shopee'.


### Bulk Insert

* https://cloud.google.com/bigquery/docs/batch-loading-data

In [14]:
table_id = "your-project.your_dataset.your_table_name"
file_path = "titanic.csv"

job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True,
)

with open(file_path, "rb") as source_file:
    job = client.load_table_from_file(source_file, table_id, job_config=job_config)

job.result()  # Waits for the job to complete.

table = client.get_table(table_id)  # Make an API request.
print(
    "Loaded {} rows and {} columns to {}".format(
        table.num_rows, len(table.schema), table_id
    )
)

Loaded 891 rows and 12 columns to dogwood-reserve-352205.demo_bq.titanic
