In [1]:
from google.cloud import bigquery
from google.oauth2 import service_account

# We will import a customized function called client which actually returns an authorized bigquery client object with right credentials
# this will cost us an extra pair of () each time we call the client object which is now called by the function client we define in bq_sa_auth.py 

from bq_sa_auth import client

## *Let's focus on the crime data in the City of Chicago following the Kaggle tutorial:*

### [Intro to SQL](https://www.kaggle.com/learn/intro-to-sql)

----------------------------------------------------------------------------------------

In [2]:
# We first construct a reference to the "chicago_crime" dataset and make a API request to fetch it

ChicCrime_ref = client().dataset("chicago_crime", project="bigquery-public-data")
ChicCrime_DS = client().get_dataset(ChicCrime_ref)

### **Exercises:**

### 1) Count tables in the dataset

How many tables are in the Chicago Crime dataset?

In [4]:
# List the tables in the dataset and count their number 

tables = list(client().list_tables(ChicCrime_DS))


print(f'The number of tables in the Chicago dataset are: {len(tables)}')
print("\n")
print(f'The name(s) of the tables in the Chicago dataset are:')
print("\n")
for tab in tables:
    
    print(tab.table_id)

The number of tables in the Chicago dataset are: 1


The name(s) of the tables in the Chicago dataset are:


crime


### 2) Explore the table schema

How many columns in the `crime` table have `TIMESTAMP` data?

In [5]:
# Lets have a look at the Schema of the crime table 

crime_tab_ref = ChicCrime_ref.table('crime')

crime_table = client().get_table(crime_tab_ref)

crime_table.schema

[SchemaField('unique_key', 'INTEGER', 'REQUIRED', None, None, (), None),
 SchemaField('case_number', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('date', 'TIMESTAMP', 'NULLABLE', None, None, (), None),
 SchemaField('block', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('iucr', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('primary_type', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('description', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('location_description', 'STRING', 'NULLABLE', None, None, (), None),
 SchemaField('arrest', 'BOOLEAN', 'NULLABLE', None, None, (), None),
 SchemaField('domestic', 'BOOLEAN', 'NULLABLE', None, None, (), None),
 SchemaField('beat', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('district', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('ward', 'INTEGER', 'NULLABLE', None, None, (), None),
 SchemaField('community_area', 'INTEGER', 'NULLABLE', None, None, (), None),
 

### There are two columns with TIMESTAMP data type. 

### 3) Create a crime map

If you wanted to create a map with a dot at the location of each crime, what are the names of the two fields you likely need to pull out of the `crime` table to plot the crimes on a map?

In [6]:
# Lets a have a look at the first 5 rows of the data 

client().list_rows(crime_table, max_results=5).to_dataframe()

Unnamed: 0,unique_key,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,ward,community_area,fbi_code,x_coordinate,y_coordinate,year,updated_on,latitude,longitude,location
0,11511120,JB521056,2018-11-18 07:55:00+00:00,0000X E LAKE ST,0281,CRIM SEXUAL ASSAULT,NON-AGGRAVATED,STREET,False,False,...,42,32,2,1176916.0,1901744.0,2018,2018-11-25 04:09:36+00:00,41.885729,-87.625781,"(41.885729338, -87.625780935)"
1,10042597,HY230989,2015-04-21 08:19:00+00:00,001XX N MICHIGAN AVE,0312,ROBBERY,ARMED:KNIFE/CUTTING INSTRUMENT,CONVENIENCE STORE,False,False,...,42,32,3,1177271.0,1901522.0,2015,2018-02-10 03:50:01+00:00,41.885112,-87.624484,"(41.885112119, -87.624484053)"
2,11469374,JB453674,2018-09-27 08:11:00+00:00,001XX N MICHIGAN AVE,0313,ROBBERY,ARMED: OTHER DANGEROUS WEAPON,RESTAURANT,False,False,...,42,32,3,1177270.0,1901551.0,2018,2018-10-11 03:57:17+00:00,41.885192,-87.624487,"(41.885191719, -87.624486846)"
3,11547591,JB568310,2018-12-26 06:45:00+00:00,0000X W RANDOLPH ST,0313,ROBBERY,ARMED: OTHER DANGEROUS WEAPON,RESTAURANT,False,False,...,42,32,3,1176035.0,1901283.0,2018,2019-02-12 04:00:01+00:00,41.884484,-87.62903,"(41.884484216, -87.629030004)"
4,13300080,JG531536,2023-12-06 11:00:00+00:00,0000X E LAKE ST,031A,ROBBERY,ARMED - HANDGUN,CONVENIENCE STORE,False,False,...,42,32,3,1176905.0,1901744.0,2023,2023-12-14 03:41:01+00:00,41.88573,-87.625821,"(41.885729587, -87.625821329)"


### We can use `longitude` and `latitude` columns to locate the crimes on a map of chicago. The last 3 columns are related to each other that contain the geographic data. `x_coordiante` and `y_coordinate` columns are also related to the location of the crime, possibly via a cartesian 2D map

In [7]:
client().list_rows(crime_table, selected_fields=crime_table.schema[-3:], max_results=5).to_dataframe()

Unnamed: 0,latitude,longitude,location
0,41.885729,-87.625781,"(41.885729338, -87.625780935)"
1,41.885112,-87.624484,"(41.885112119, -87.624484053)"
2,41.885192,-87.624487,"(41.885191719, -87.624486846)"
3,41.884484,-87.62903,"(41.884484216, -87.629030004)"
4,41.88573,-87.625821,"(41.885729587, -87.625821329)"
