In [7]:
# Pointing the json key file of google cloud service account to local copy
import os

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] ='key.json'

In [8]:
from google.cloud import bigquery

In [9]:
# Create a 'Client' Object
client = bigquery.Client()

In [10]:
# In BigQuery, each dataset is contained in a corresponding project. In this case, our hacker_news dataset is contained in the bigquery-public-data project.

# Construct a reference to the 'hacker-news' dataset
dataset_ref = client.dataset('hacker_news', project='bigquery-public-data')

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)

In [11]:
# List all the tables in the "hacker_news" dataset
tables = list(client.list_tables(dataset))

for table in tables:
    print(table.table_id)

comments
full
full_201510
stories


In [12]:
#  Construct a reference to the "full" table
table_ref = dataset_ref.table('full')

# API request - fetch the table
table = client.get_table(table_ref)

![title](./img/bigquery.png)

In [13]:
# Print information on all the columns in the "full" table in the "hacker_news" dataset

table.schema

[SchemaField('title', 'STRING', 'NULLABLE', 'Story title', (), None),
 SchemaField('url', 'STRING', 'NULLABLE', 'Story url', (), None),
 SchemaField('text', 'STRING', 'NULLABLE', 'Story or comment text', (), None),
 SchemaField('dead', 'BOOLEAN', 'NULLABLE', 'Is dead?', (), None),
 SchemaField('by', 'STRING', 'NULLABLE', "The username of the item's author.", (), None),
 SchemaField('score', 'INTEGER', 'NULLABLE', 'Story score', (), None),
 SchemaField('time', 'INTEGER', 'NULLABLE', 'Unix time', (), None),
 SchemaField('timestamp', 'TIMESTAMP', 'NULLABLE', 'Timestamp for the unix time', (), None),
 SchemaField('type', 'STRING', 'NULLABLE', 'Type of details (comment, comment_ranking, poll, story, job, pollopt)', (), None),
 SchemaField('id', 'INTEGER', 'NULLABLE', "The item's unique id.", (), None),
 SchemaField('parent', 'INTEGER', 'NULLABLE', 'Parent comment ID', (), None),
 SchemaField('descendants', 'INTEGER', 'NULLABLE', 'Number of story or poll descendants', (), None),
 SchemaField

In [14]:
# Preview the first five lines of the "full" table

client.list_rows(table, max_results=5).to_dataframe()

Unnamed: 0,title,url,text,dead,by,score,time,timestamp,type,id,parent,descendants,ranking,deleted
0,,,The trouble with computer security &quot;defen...,,Animats,,1452382620,2016-01-09 23:37:00+00:00,comment,10873338,10871222,,,
1,,,That doesn&#x27;t help much when you&#x27;re i...,,Grishnakh,,1452382603,2016-01-09 23:36:43+00:00,comment,10873336,10871800,,,
2,,,"Are device drivers <i>ever</i> updated, much l...",,cbd1984,,1452382579,2016-01-09 23:36:19+00:00,comment,10873334,10873317,,,
3,,,To be specific about my claim: throwing away t...,,JackC,,1452382571,2016-01-09 23:36:11+00:00,comment,10873333,10865548,,,
4,,,"Meanwhile, San Francisco can&#x27;t muster the...",,capkutay,,1452382557,2016-01-09 23:35:57+00:00,comment,10873332,10873211,,,


In [15]:
# Preview the first five entries in the "by" column of the "full" table

client.list_rows(table, selected_fields=table.schema[4:5], max_results=5).to_dataframe()

Unnamed: 0,by
0,Animats
1,Grishnakh
2,cbd1984
3,JackC
4,capkutay


### **EXERCISE**

In [16]:
# Fetch The DataSet

from google.cloud import bigquery

# Create a 'Client' object
exclient = bigquery.Client()

# Construct a reference to the 'chicago_crime' dataset
exdataset_ref = client.dataset('chicago_crime', project='bigquery-public-data')

# API request - fetch the dataset
exdataset = client.get_dataset(exdataset_ref)

**1] Count tables in the dataset**

In [17]:
# List the tables in the dataset
tables = list(exclient.list_tables(exdataset))

for table in tables:
    print(table.table_id)

crime


In [18]:
# Count the number of tables in the dataset

num_tables = len(tables)
print(num_tables)

1


**2] Explore the table schema**

In [19]:
#  Construct a reference to the "full" table
table_ref = exdataset_ref.table('crime')

# API request - fetch the table
table = exclient.get_table(table_ref)

client.list_rows(table, max_results=5).to_dataframe()

Unnamed: 0,unique_key,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,ward,community_area,fbi_code,x_coordinate,y_coordinate,year,updated_on,latitude,longitude,location
0,10246123,HY434120,2015-09-19 00:01:00+00:00,107XX S AVENUE E,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,OTHER,False,False,...,10,52,11,1203836.0,1834299.0,2015,2018-02-10 15:50:01+00:00,41.700006,-87.529244,"(41.700006196, -87.529244454)"
1,10249320,HY436757,2015-09-23 22:30:00+00:00,023XX N OAK PARK AVE,1360,CRIMINAL TRESPASS,TO VEHICLE,VEHICLE NON-COMMERCIAL,False,False,...,36,18,26,1130692.0,1914937.0,2015,2018-02-10 15:50:01+00:00,41.922853,-87.795223,"(41.922853085, -87.795223066)"
2,10251049,HY438471,2015-09-24 00:01:00+00:00,052XX N PITTSBURGH AVE,1360,CRIMINAL TRESPASS,TO VEHICLE,STREET,False,False,...,41,10,26,1120336.0,1933652.0,2015,2018-02-10 15:50:01+00:00,41.974381,-87.832874,"(41.974381465, -87.832873852)"
3,10289907,HY477895,2015-10-26 15:00:00+00:00,052XX W 63RD PL,1185,DECEPTIVE PRACTICE,DECEPTIVE COLLECTION PRACTICES,RESIDENCE,False,False,...,13,64,11,1142388.0,1862000.0,2015,2018-02-10 15:50:01+00:00,41.777377,-87.753561,"(41.777377147, -87.753560657)"
4,10298702,HY487110,2015-11-02 21:00:00+00:00,043XX S KEELER AVE,920,MOTOR VEHICLE THEFT,ATT: AUTOMOBILE,STREET,False,False,...,14,57,7,1149068.0,1875589.0,2015,2018-02-10 15:50:01+00:00,41.814541,-87.728721,"(41.814541249, -87.728721044)"


In [20]:
table.schema

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', 'Unique identifier for the record.', (), None),
 SchemaField('case_number', 'STRING', 'NULLABLE', 'The Chicago Police Department RD Number (Records Division Number), which is unique to the incident.', (), None),
 SchemaField('date', 'TIMESTAMP', 'NULLABLE', 'Date when the incident occurred. this is sometimes a best estimate.', (), None),
 SchemaField('block', 'STRING', 'NULLABLE', 'The partially redacted address where the incident occurred, placing it on the same block as the actual address.', (), None),
 SchemaField('iucr', 'STRING', 'NULLABLE', 'The Illinois Unifrom Crime Reporting code. This is directly linked to the Primary Type and Description. See the list of IUCR codes at https://data.cityofchicago.org/d/c7ck-438e.', (), None),
 SchemaField('primary_type', 'STRING', 'NULLABLE', 'The primary description of the IUCR code.', (), None),
 SchemaField('description', 'STRING', 'NULLABLE', 'The secondary description of the IUCR code, a s

**How many columns in the crime table have TIMESTAMP data?**

In [21]:
num_timestamp_fields = 2

print(num_timestamp_fields)

2


#### 3] Create a crime map

In [22]:
# what are the names of the two fields you likely need to pull out of the crime table to plot the crimes on a map?

client.list_rows(table, selected_fields=table.schema[15:17], max_results=5).to_dataframe()

Unnamed: 0,x_coordinate,y_coordinate
0,1203836.0,1834299.0
1,1130692.0,1914937.0
2,1120336.0,1933652.0
3,1142388.0,1862000.0
4,1149068.0,1875589.0


In [23]:
fields_for_plotting = ['x_coordinate', 'y_coordinate']

print(fields_for_plotting)

['x_coordinate', 'y_coordinate']
