In [18]:
import yaml
from google.cloud import bigquery


In [19]:
config = yaml.load(open("credentials.yml"), yaml.Loader)
client = bigquery.Client.from_service_account_json(config["credentials_path"])


In [20]:
# Define a reference to a dataset we are working with.
dataset_ref = bigquery.DatasetReference("bigquery-public-data", "hacker_news")
# List all the tables in the "hacker_news" dataset
print([table.table_id for table in client.list_tables(dataset_ref)])


['full']


In [13]:
# Construct a reference to the "full" table
table_ref = bigquery.TableReference(dataset_ref, "full")
# API request - fetch the table
table = client.get_table(table_ref)


In [14]:
# Print information on all the columns in the "full" table in the "hacker_news" dataset
table.schema


[SchemaField('title', 'STRING', 'NULLABLE', None, 'Story title', (), None),
 SchemaField('url', 'STRING', 'NULLABLE', None, 'Story url', (), None),
 SchemaField('text', 'STRING', 'NULLABLE', None, 'Story or comment text', (), None),
 SchemaField('dead', 'BOOLEAN', 'NULLABLE', None, 'Is dead?', (), None),
 SchemaField('by', 'STRING', 'NULLABLE', None, "The username of the item's author.", (), None),
 SchemaField('score', 'INTEGER', 'NULLABLE', None, 'Story score', (), None),
 SchemaField('time', 'INTEGER', 'NULLABLE', None, 'Unix time', (), None),
 SchemaField('timestamp', 'TIMESTAMP', 'NULLABLE', None, 'Timestamp for the unix time', (), None),
 SchemaField('type', 'STRING', 'NULLABLE', None, 'Type of details (comment, comment_ranking, poll, story, job, pollopt)', (), None),
 SchemaField('id', 'INTEGER', 'NULLABLE', None, "The item's unique id.", (), None),
 SchemaField('parent', 'INTEGER', 'NULLABLE', None, 'Parent comment ID', (), None),
 SchemaField('descendants', 'INTEGER', 'NULLABL

In [15]:
# Preview the first five lines of the "full" table
client.list_rows(table, max_results=5).to_dataframe()


Unnamed: 0,title,url,text,dead,by,score,time,timestamp,type,id,parent,descendants,ranking,deleted
0,,,"I would rather just have wired earbuds, period...",,zeveb,,1591717736,2020-06-09 15:48:56+00:00,comment,23467666,23456782,,,
1,,,DNS?,,nly,,1572810465,2019-11-03 19:47:45+00:00,comment,21436112,21435130,,,
2,,,These benchmarks seem pretty good. Filterable...,,mrkeen,,1591717727,2020-06-09 15:48:47+00:00,comment,23467665,23467426,,,
3,,,Oh really?<p>* Excel alone uses 86.1MB of priv...,,oceanswave,,1462987532,2016-05-11 17:25:32+00:00,comment,11677248,11676886,,,
4,,,These systems are useless. Of the many flaws:...,,nyxxie,,1572810473,2019-11-03 19:47:53+00:00,comment,21436113,21435025,,,


In [16]:
# Preview the first five entries in the "by" column of the "full" table
client.list_rows(table, selected_fields=table.schema[:1], max_results=5).to_dataframe()


Unnamed: 0,title
0,
1,
2,
3,
4,
