In [4]:
from google.cloud import storage
import requests

In [11]:
def upload_url_to_gcs(bucket_name, source_url, destination_blob_name):
    """
    Uploads content from a URL to a Google Cloud Storage bucket.
    
    Parameters:
    - bucket_name: Name of the GCS bucket.
    - source_url: URL of the file to download and upload.
    - destination_blob_name: GCS blob name including the path.
    """
    # Initialize GCS client and bucket
    storage_client = storage.Client(project = "sound-oasis-309708")
    bucket = storage_client.bucket(bucket_name)
    
    # Initialize blob
    blob = bucket.blob(destination_blob_name)
    
    # Fetch content from URL
    response = requests.get(source_url, stream=True)
    response.raise_for_status()  # Ensure the request was successful
    
    # Upload content to GCS
    blob.upload_from_string(response.content, content_type='application/octet-stream')
    
    print(f"File from {source_url} uploaded to {destination_blob_name}.")



In [13]:
# Example usage
bucket_name = 'green_big_warehouse'
base_url = 'https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-'

months = ['0'+str(x) for x in range(1, 10)]
months.extend(['10', '11', '12'])

for month_suffix in months:
    source_url = f"{base_url}{month_suffix}.parquet"
    destination_blob_name = f"green_tripdata_2022/{month_suffix}.parquet"  # Adjusted path
    upload_url_to_gcs(bucket_name, source_url, destination_blob_name)


File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-01.parquet uploaded to green_tripdata_2022/01.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-02.parquet uploaded to green_tripdata_2022/02.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-03.parquet uploaded to green_tripdata_2022/03.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-04.parquet uploaded to green_tripdata_2022/04.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-05.parquet uploaded to green_tripdata_2022/05.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-06.parquet uploaded to green_tripdata_2022/06.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripdata_2022-07.parquet uploaded to green_tripdata_2022/07.parquet.
File from https://d37ci6vzurychx.cloudfront.net/trip-data/green_tripd