In [0]:
%pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client pandas

In [0]:
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build

In [0]:
def get_google_sheets_service(service_account_file: str, scopes: list = None):
    """Cria e retorna o serviço da Google Sheets API."""
    if scopes is None:
        scopes = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
    
    credentials = service_account.Credentials.from_service_account_file(
        service_account_file,
        scopes=scopes
    )
    
    service = build('sheets', 'v4', credentials=credentials)
    return service.spreadsheets()

In [0]:
def fetch_sheet_values(sheet_service, spreadsheet_id: str, range_name: str):
    """Busca os valores da planilha Google Sheets."""
    result = sheet_service.values().get(
        spreadsheetId=spreadsheet_id,
        range=range_name
    ).execute()
    
    return result.get('values', [])

In [0]:
def google_sheets_to_spark_df(
    spark,
    service_account_file: str,
    spreadsheet_id: str,
    range_name: str
):
    """Retorna um Spark DataFrame a partir de uma planilha do Google Sheets."""
    
    sheet_service = get_google_sheets_service(service_account_file)
    values = fetch_sheet_values(sheet_service, spreadsheet_id, range_name)
    
    if values and len(values) > 1:
        headers = values[0]
        data = values[1:]
        return spark.createDataFrame(data, schema=headers)
    else:
        return spark.createDataFrame([], schema=[])