# Using Cloud Storage and BigQuery in Python

In [1]:
import pandas as pd
from google.cloud import storage
from google.colab import auth
auth.authenticate_user()

### Download data from Cloud Storage

In [2]:
def download_blob(project_id, bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""
    # project_id = "your-project-id"
    # bucket_name = "your-bucket-name"
    # source_blob_name = "storage-object-name"
    # destination_file_name = "local/path/to/file"

    storage_client = storage.Client(project_id)

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)

    print(
        "Blob {} downloaded to {}.".format(
            source_blob_name, destination_file_name
        )
    )

In [3]:
download_blob(project_id='ba-780', bucket_name='ba-780',
              source_blob_name='data/athlete_events.csv',
              destination_file_name='athlete_events.csv')

Blob data/athlete_events.csv downloaded to athlete_events.csv.


### Loading to pandas and processing

In [4]:
athlete_events = pd.read_csv('athlete_events.csv')
athlete_events.head(3)

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,


In [5]:
athlete_events_summer_2012 = athlete_events[athlete_events.Games == '2012 Summer']
athlete_events_summer_2012.shape

(12920, 15)

In [6]:
# write to csv
athlete_events_summer_2012.to_csv('athlete_events_summer_2012.csv')

### Uploading to Cloud Storage bucket

In [7]:
def upload_blob(project_id, bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # project_id = "your-project-id"
    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client(project_id)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        "File {} uploaded to {}.".format(
            source_file_name, destination_blob_name
        )
    )

In [8]:
upload_blob(project_id='ba-780', bucket_name='ba-780',
            source_file_name='athlete_events_summer_2012.csv',
            destination_blob_name='data/athlete_events_summer_2012.csv')

File athlete_events_summer_2012.csv uploaded to data/athlete_events_summer_2012.csv.


### Download from BigQuery with Pandas

In [10]:
import pandas_gbq

customerChurn = pandas_gbq.read_gbq(
    """
    SELECT * FROM `ba-780.examples.customerChurn`
    """,
    project_id="ba-780"
)

Downloading: 100%|[32m██████████[0m|


In [11]:
customerChurn.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,8883-GRDWQ,Male,1,False,False,20,True,No,DSL,Yes,...,No,Yes,No,No,One year,False,Mailed check,54.0,1055.9,False
1,2282-YGNOR,Female,0,False,False,29,True,No,DSL,Yes,...,Yes,Yes,No,No,One year,False,Credit card (automatic),58.0,1734.5,False
2,4003-OCTMP,Female,0,True,False,31,True,No,DSL,Yes,...,No,Yes,No,Yes,One year,True,Electronic check,64.0,1910.75,False
3,7654-YWJUF,Male,0,True,False,43,True,No,Fiber optic,Yes,...,Yes,Yes,No,No,One year,True,Bank transfer (automatic),84.25,3539.25,False
4,5777-KJIRB,Female,0,False,False,40,True,No,DSL,No,...,Yes,No,No,No,One year,True,Mailed check,50.25,2023.55,False


In [12]:
customer_churn_monthly_vs_payMethod = customerChurn.groupby('PaymentMethod')['MonthlyCharges'].mean().to_frame().reset_index()
customer_churn_monthly_vs_payMethod

Unnamed: 0,PaymentMethod,MonthlyCharges
0,Bank transfer (automatic),67.192649
1,Credit card (automatic),66.512385
2,Electronic check,76.255814
3,Mailed check,43.91706


In [14]:
pandas_gbq.to_gbq(
    customer_churn_monthly_vs_payMethod,
    destination_table='temp.customer_churn_monthly_vs_payMethod',
    project_id='ba-780',
    if_exists='replace'
)

100%|██████████| 1/1 [00:00<00:00, 4510.00it/s]
