# Reference

### [How to setup OS environment and GOOGLE_APPLICATION_CREDENTIALS](#os_env)  

### [How to setup API in BigQuery](#bigquery)  
  1 [Google cloud bigquery client](#bigqueryclient)  
  2 [Google cloud bigquery magics](#bigquerymagics)
  
### [How to setup API in Google Cloud Storage](#CloudStorage)


<a id='os_env'></a>
### How to setup OS environment and GOOGLE_APPLICATION_CREDENTIALS

In [None]:
#key location C:\Users\peaceful-parity-336514-f361713c806a.json
# $env:GOOGLE_APPLICATION_CREDENTIALS="C:\Users\peaceful-parity-336514-f361713c806a.json"

<a id='bigquery'></a>
### How to setup work with bigquery

In [12]:
# Please ensure that you have enabled the API in the GCP (check in the console) 

In [2]:
from google.cloud import bigquery
from google.oauth2 import service_account

# this is the path to the key file which you get from the GCP 
key_path = "C:\\Users\\peaceful-parity-336514-f361713c806a.json"
print(key_path)

C:\Users\peaceful-parity-336514-f361713c806a.json


In [3]:
credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)

In [4]:
print(credentials.project_id)
print(credentials)

peaceful-parity-336514
<google.oauth2.service_account.Credentials object at 0x0000023148C11BB0>


In [5]:
#Setup the GCP client. 
#Create the object of the client credentials and project_id comes from the key file 
client = bigquery.Client(credentials=credentials, project=credentials.project_id)

<a id='bigqueryclient'></a>
### Google cloud bigquery client

In [6]:
#example of the test query to check that setup is working 
query = """
    SELECT name, SUM(number) as total_people
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
    WHERE state = 'TX'
    GROUP BY name, state
    ORDER BY total_people DESC
    LIMIT 20
"""
query_job = client.query(query)  # Make an API request.
print(query_job, "\n")
print("The query data:")
for row in query_job:
    # Row values can be accessed by field name or index.
    print("name={}, count={}".format(row[0], row["total_people"]))

QueryJob<project=peaceful-parity-336514, location=US, id=02c48fda-21ec-4b30-9fc2-7dfcb084e2a9> 

The query data:
name=James, count=272793
name=John, count=235139
name=Michael, count=225320
name=Robert, count=220399
name=David, count=219028
name=Mary, count=209893
name=William, count=173092
name=Jose, count=157362
name=Christopher, count=144196
name=Maria, count=131056
name=Charles, count=126509
name=Daniel, count=117470
name=Richard, count=109888
name=Juan, count=109808
name=Jennifer, count=98696
name=Joshua, count=90679
name=Elizabeth, count=90465
name=Joseph, count=89097
name=Matthew, count=88464
name=Joe, count=87977


<a id='bigquerymagics'></a>
### Google cloud bigquery magics

In [7]:
#To be Added

<a id='Create Dataset'></a>
### Google cloud bigquery Create Dataset

In [None]:
dataset_name = "Your_name"
dataset_id = "{}.{}".format(client.project, dataset_name)
# TODO(developer): Set dataset_id to the ID of the dataset to create.
# dataset_id = "{}.your_dataset".format(client.project)

# Construct a full Dataset object to send to the API.
dataset = bigquery.Dataset(dataset_id)

# TODO(developer): Specify the geographic location where the dataset should reside.
dataset.location = "US"

# Send the dataset to the API for creation, with an explicit timeout.
# Raises google.api_core.exceptions.Conflict if the Dataset already
# exists within the project.
dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

<a id='Copy Dataset'></a>
### Google cloud bigquery Copy Dataset

In [17]:
from google.cloud import bigquery_datatransfer
transfer_client = bigquery_datatransfer.DataTransferServiceClient()

destination_project_id = "my-destination-project"
destination_dataset_id = "my_destination_dataset"
source_project_id = "my-source-project"
source_dataset_id = "my_source_dataset"
transfer_config = bigquery_datatransfer.TransferConfig(
    destination_dataset_id=destination_dataset_id,
    display_name="Your Dataset Copy Name",
    data_source_id="cross_region_copy",
    params={
        "source_project_id": source_project_id,
        "source_dataset_id": source_dataset_id,
    },
    schedule="every 24 hours",
)
transfer_config = transfer_client.create_transfer_config(
    parent=transfer_client.common_project_path(destination_project_id),
    transfer_config=transfer_config,
)
print(f"Created transfer config: {transfer_config.name}")

<a id='Copy Table'></a>
### Google cloud bigquery Copy Table

In [None]:
source_table_path = "bigquery-public-data.ml_datasets.credit_card_default"
table_name = "Your_table_name"

table_id = "{}.{}.{}".format(client.project,dataset.dataset_id,table_name)
job_config = bigquery.QueryJobConfig(destination=table_id)

sql = "SELECT * FROM `{}`;".format(source_table_path)

# Start the query, passing in the extra configuration.
query_job = client.query(sql, job_config=job_config)  # Make an API request.
query_job.result()  # Wait for the job to complete.

print("Query results loaded to the table {}".format(table_id))

<a id='Delete Dataset'></a>
### Google cloud bigquery Delete Dataset

In [None]:
dataset_name = "Your_name"
dataset_id = "{}.{}".format(client.project, dataset_name)

client.delete_dataset(
    dataset_id, delete_contents=True, not_found_ok=True
)  # Make an API request.

print("Deleted dataset '{}'.".format(dataset_id))

<a id='CloudStorage'></a>
### How to setup API in Google Cloud Storage

In [8]:
# pip install --upgrade google-cloud-storage

In [21]:
from google.cloud import storage
from google.oauth2 import service_account


# this is the path to the key file which you get from the GCP 
key_path = "C:\\Users\\peaceful-parity-336514-f361713c806a.json"
print(key_path)

credentials = service_account.Credentials.from_service_account_file(
    key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
print(credentials.project_id)
print(credentials)

C:\Users\peaceful-parity-336514-f361713c806a.json
peaceful-parity-336514
<google.oauth2.service_account.Credentials object at 0x0000023148BDD550>


In [10]:
storage_client = storage.Client(credentials=credentials, project=credentials.project_id)

In [11]:
# The name for the new bucket
bucket_name = "dataproc_lab_bucket"

In [12]:
# Create bucket ml_model_lab
bucket_name = "ml_model_lab"

#check if the bucket exists
try:
    storage_client.get_bucket(bucket_name)
    print("Bucket:", bucket_name, "exists")
except:
    print("Bucket:", bucket_name, "doesn't exist")
    print("Creating the bucket:", bucket_name)
    bucket = storage_client.bucket(bucket_name)
    bucket.storage_class = "STANDARD"
    bucket.locationType = "multi-region"
    new_bucket = storage_client.create_bucket(bucket, location="us")

Bucket: ml_model_lab exists


In [13]:
bucket = storage_client.get_bucket(bucket_name).delete()

In [14]:
# List buckets
storage_client = storage.Client(credentials=credentials, project=credentials.project_id)
buckets = storage_client.list_buckets()
for i in buckets:
    print(i)
    
bucket_info = storage_client.get_bucket("dataproc_lab_bucket")
print(bucket_info._properties)
for i in bucket_info._properties.keys():
    print(i,":",bucket_info._properties[i])

<Bucket: dataproc_lab_bucket>
<Bucket: gcf-sources-981390829592-us-central1>
<Bucket: peaceful-parity-336514.appspot.com>
<Bucket: staging.peaceful-parity-336514.appspot.com>
<Bucket: us.artifacts.peaceful-parity-336514.appspot.com>
{'kind': 'storage#bucket', 'selfLink': 'https://www.googleapis.com/storage/v1/b/dataproc_lab_bucket', 'id': 'dataproc_lab_bucket', 'name': 'dataproc_lab_bucket', 'projectNumber': '981390829592', 'metageneration': '6', 'location': 'US', 'storageClass': 'STANDARD', 'etag': 'CAY=', 'timeCreated': '2022-02-05T05:07:27.989Z', 'updated': '2022-02-06T15:39:26.319Z', 'iamConfiguration': {'bucketPolicyOnly': {'enabled': False}, 'uniformBucketLevelAccess': {'enabled': False}, 'publicAccessPrevention': 'inherited'}, 'locationType': 'multi-region', 'rpo': 'DEFAULT'}
kind : storage#bucket
selfLink : https://www.googleapis.com/storage/v1/b/dataproc_lab_bucket
id : dataproc_lab_bucket
name : dataproc_lab_bucket
projectNumber : 981390829592
metageneration : 6
location : US