# Import necessary libraries

In [5]:
from google.cloud import bigquery
import os
from google.oauth2 import service_account
import pandas as pd
import db_dtypes
import warnings
warnings.filterwarnings("ignore")


# Get Connection Details and Establish Connection

In [6]:
# Set up credentials
credentials = service_account.Credentials.from_service_account_file('/Users/kiranbele/Downloads/bigq-data-engineering.json')

project_id = "bigq-data-engineering-444816"
# Initialize the BigQuery client
client = bigquery.Client(credentials=credentials, project=project_id)
# client = bigquery.Client()
print(client)

# # Verify the connection: List datasets in your project
#   # Replace with your Project ID
datasets = list(client.list_datasets(project=project_id))
if datasets:
    print("Datasets in the project:")
    for dataset in datasets:
        print(f" - {dataset.dataset_id}")
else:
    print(f"No datasets found in project '{project_id}'.")

<google.cloud.bigquery.client.Client object at 0x122fe2210>
Datasets in the project:
 - KBELE_LONDON


In [7]:
# Execute a query
query = f"""
SELECT *
FROM {project_id}.KBELE_LONDON.JOURNEYS
LIMIT 10
"""
query_job = client.query(query)

df = query_job.to_dataframe()

print(df.head())

   month  year  days report_date       journey_type  journeys_millions
0      2  2020    29  2020-06-30  Underground & DLR           8.041099
1      2  2020    29  2020-06-30                Bus          32.467243
2      2  2020    29  2020-06-30               Tram           0.470811
3      2  2020    29  2020-06-30         Overground           1.491890
4      2  2020    29  2020-06-30   Emirates Airline           0.004978


# Queries to answer some business questions

In [8]:
# Query 1: Most popular transport types
most_popular_types_query = f"""
SELECT journey_type,
    SUM(journeys_millions)/1000000 as total_journeys_millions
FROM {project_id}.KBELE_LONDON.JOURNEYS
GROUP BY journey_type
ORDER BY total_journeys_millions DESC;
"""

# Query 2: Emirates Airline popularity by month and year
emirates_query = f"""
SELECT month, 
	year, 
	ROUND(journeys_millions, 2) AS rounded_journeys_millions
FROM {project_id}.KBELE_LONDON.JOURNEYS
WHERE journey_type = 'Emirates Airline' AND journeys_millions IS NOT NULL
ORDER BY rounded_journeys_millions DESC
LIMIT 5;
"""

# Query 3: Least popular years for Underground & DLR
underground_query = f"""
SELECT year,
	journey_type,
	SUM(journeys_millions) as total_journeys_millions
FROM {project_id}.KBELE_LONDON.JOURNEYS
WHERE journey_type LIKE '%Underground%'
GROUP BY year, journey_type
ORDER BY total_journeys_millions
LIMIT 5;
"""

# Execute queries and fetch results
try:
    # Query 1
    print("Executing Query 1: Most popular transport types")
    query_job = client.query(most_popular_types_query)
    df1 = query_job.to_dataframe()
    print("Query 1 Results:")
    print(df1.head())

    # Query 2
    print("\nExecuting Query 2: Emirates Airline popularity by month and year")
    query_job = client.query(emirates_query)
    df2 = query_job.to_dataframe()
    print("Query 2 Results:")
    print(df2.head())

    # Query 3
    print("\nExecuting Query 3: Least popular years for Underground & DLR")
    query_job = client.query(underground_query)
    df3 = query_job.to_dataframe()
    print("Query 3 Results:")
    print(df3.head())

except Exception as e:
    print(f"An error occurred: {e}")

Executing Query 1: Most popular transport types
Query 1 Results:
        journey_type  total_journeys_millions
0                Bus                 0.024905
1  Underground & DLR                 0.015020
2         Overground                 0.001667
3           TfL Rail                 0.000411
4               Tram                 0.000315

Executing Query 2: Emirates Airline popularity by month and year
Query 2 Results:
   month  year  rounded_journeys_millions
0      5  2012                       0.53
1      6  2012                       0.38
2      4  2012                       0.24
3      5  2022                       0.19
4      5  2021                       0.19

Executing Query 3: Least popular years for Underground & DLR
Query 3 Results:
   year       journey_type  total_journeys_millions
0  2020  Underground & DLR               310.179316
1  2021  Underground & DLR               748.452544
2  2022  Underground & DLR              1064.859009
3  2010  Underground & DLR           