**Moving kaggle datasets to specific Google Storage Bucket**

Reference notebook : Click [here](http://www.kaggle.com/code/paultimothymooney/how-to-move-data-from-kaggle-to-gcs-and-back)


Import Libraries

In [5]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

Connect to GCS Storage Client

In [6]:
from google.cloud import storage
storage_client = storage.Client(project='kagglehomecredit')

Set dataset path

In [7]:
inputdatapath = '/kaggle/input/home-credit-credit-risk-model-stability/csv_files/'

In [8]:
for dirname, _, filenames in os.walk(inputdatapath):
    
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_6.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_11.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_0.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_9.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_base.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_1_1.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_b_2.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_10.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_4.csv
/kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_static_0_0.csv
/kaggle/input/home-credit-credit-risk-model-

Helper functions
1. Create Bucjet
2. Upload Files
3. List Files
4. Download to kaggle notebook

In [9]:
def create_bucket(dataset_name):
    """Creates a new bucket. https://cloud.google.com/storage/docs/ """
    bucket = storage_client.create_bucket(dataset_name)
    print('Bucket {} created'.format(bucket.name))

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket. https://cloud.google.com/storage/docs/ """
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)
    print('File {} uploaded to {}.'.format(
        source_file_name,
        destination_blob_name))
    
def list_blobs(bucket_name):
    """Lists all the blobs in the bucket. https://cloud.google.com/storage/docs/"""
    blobs = storage_client.list_blobs(bucket_name)
    for blob in blobs:
        print(blob.name)
        
def download_to_kaggle(bucket_name,destination_directory,file_name):
    """Takes the data from your GCS Bucket and puts it into the working directory of your Kaggle notebook"""
    os.makedirs(destination_directory, exist_ok = True)
    full_file_path = os.path.join(destination_directory, file_name)
    blobs = storage_client.list_blobs(bucket_name)
    for blob in blobs:
        blob.download_to_filename(full_file_path)

Creates a GCS bucket if not existing

In [10]:
bucket_name = 'baglitech-kaggle-homecredit'         
try:
    create_bucket(bucket_name)   
except:
    pass

Loop the data path to upload all files in the datapath into GCS Bucket

In [11]:
# local_data = '/kaggle/input/breast-cancer-wisconsin-data/data.csv'
# file_name = 'data.csv' 

for dirname, _, filenames in os.walk(inputdatapath):
    
    for filename in filenames:
        filepath = os.path.join(dirname, filename)
        upload_blob(bucket_name, filepath, filename)

File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_6.csv uploaded to test_credit_bureau_a_2_6.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_11.csv uploaded to test_credit_bureau_a_2_11.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_0.csv uploaded to test_credit_bureau_a_2_0.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_2_9.csv uploaded to test_credit_bureau_a_2_9.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_base.csv uploaded to test_base.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_a_1_1.csv uploaded to test_credit_bureau_a_1_1.csv.
File /kaggle/input/home-credit-credit-risk-model-stability/csv_files/test/test_credit_bureau_b_2.csv uploaded to test_credit_bureau_b_2.csv.
File /kaggle/inpu

In [12]:
print('Data inside of',bucket_name,':')
list_blobs(bucket_name)

Data inside of baglitech-kaggle-homecredit :
FileName_ColumnName.xlsx
Home Credit File Schema.drawio
hc-rawdata/
test_applprev_1_0.csv
test_applprev_1_1.csv
test_applprev_1_2.csv
test_applprev_2.csv
test_base.csv
test_credit_bureau_a_1_0.csv
test_credit_bureau_a_1_1.csv
test_credit_bureau_a_1_2.csv
test_credit_bureau_a_1_3.csv
test_credit_bureau_a_1_4.csv
test_credit_bureau_a_2_0.csv
test_credit_bureau_a_2_1.csv
test_credit_bureau_a_2_10.csv
test_credit_bureau_a_2_11.csv
test_credit_bureau_a_2_2.csv
test_credit_bureau_a_2_3.csv
test_credit_bureau_a_2_4.csv
test_credit_bureau_a_2_5.csv
test_credit_bureau_a_2_6.csv
test_credit_bureau_a_2_7.csv
test_credit_bureau_a_2_8.csv
test_credit_bureau_a_2_9.csv
test_credit_bureau_b_1.csv
test_credit_bureau_b_2.csv
test_debitcard_1.csv
test_deposit_1.csv
test_other_1.csv
test_person_1.csv
test_person_2.csv
test_static_0_0.csv
test_static_0_1.csv
test_static_0_2.csv
test_static_cb_0.csv
test_tax_registry_a_1.csv
test_tax_registry_b_1.csv
test_tax_reg