# GCP Services

## GCP Storage

In [None]:
from google.cloud import storage

# Initialize a Cloud Storage client
client = storage.Client()

# Access a specific bucket
bucket_name = "your-bucket-name"
bucket = client.bucket(bucket_name)

# List all blobs (files) in the bucket
blobs = bucket.list_blobs()
for blob in blobs:
    print(blob.name)

# Download a blob
blob_name = "your-file.txt"
blob = bucket.blob(blob_name)
blob.download_to_filename("local-file.txt")

# Upload a file
blob.upload_from_filename("local-file.txt")


## BQ Queries

In [None]:
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client()

# Write a query
query = """
    SELECT name, age
    FROM `your-project-id.your-dataset.your-table`
    WHERE age > 30
"""

# Execute the query
query_job = client.query(query)

# Fetch results
results = query_job.result()
for row in results:
    print(f"Name: {row['name']}, Age: {row['age']}")


## Load Data in BQ

In [None]:
from google.cloud import bigquery

# Initialize BigQuery client
client = bigquery.Client()

# Define table details
table_id = "your-project-id.your-dataset.your-table"

# Load data from a local CSV file
job_config = bigquery.LoadJobConfig(
    source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True
)
with open("local-file.csv", "rb") as source_file:
    load_job = client.load_table_from_file(source_file, table_id, job_config=job_config)

# Wait for the load job to complete
load_job.result()

# Verify the loaded data
table = client.get_table(table_id)
print(f"Loaded {table.num_rows} rows into {table_id}.")


## 3rd Party API's

In [None]:
import requests

# API URL and authentication details
api_url = "https://api.thirdparty.com/data"
headers = {"Authorization": "Bearer YOUR_ACCESS_TOKEN"}

# Fetch data from the API
response = requests.get(api_url, headers=headers)

# Process the response
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print(f"Error: {response.status_code}")


## Access DB's

In [None]:
import pymysql

# Database connection details
connection = pymysql.connect(
    host="your-database-host",
    user="your-username",
    password="your-password",
    database="your-database-name"
)

# Execute a query
with connection.cursor() as cursor:
    query = "SELECT * FROM your_table WHERE age > %s"
    cursor.execute(query, (30,))  # Passing parameters securely
    results = cursor.fetchall()
    for row in results:
        print(row)

# Close the connection
connection.close()


## Stream Data

In [None]:
from google.cloud import pubsub_v1
from google.cloud import bigquery

# Initialize Pub/Sub subscriber
subscriber = pubsub_v1.SubscriberClient()
subscription_path = "projects/your-project-id/subscriptions/your-subscription-name"

# Initialize BigQuery client
bq_client = bigquery.Client()
table_id = "your-project-id.your-dataset.your-table"

# Callback function for Pub/Sub messages
def callback(message):
    print(f"Received message: {message.data}")

    # Convert message to BigQuery format
    row = {"column1": "value1", "column2": "value2"}
    errors = bq_client.insert_rows_json(table_id, [row])
    if not errors:
        print("Data successfully written to BigQuery")
    else:
        print(f"Errors: {errors}")
    message.ack()

# Subscribe to the topic
future = subscriber.subscribe(subscription_path, callback=callback)
print(f"Listening for messages on {subscription_path}...")

# Keep the subscriber active
try:
    future.result()
except KeyboardInterrupt:
    future.cancel()


## Dataflow(ETL)

In [None]:
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions

# Define the pipeline
options = PipelineOptions()
with beam.Pipeline(options=options) as pipeline:
    (pipeline
     | "Read from GCS" >> beam.io.ReadFromText("gs://your-bucket/input.csv")
     | "Parse CSV" >> beam.Map(lambda line: line.split(","))
     | "Write to GCS" >> beam.io.WriteToText("gs://your-bucket/output"))
