In [10]:
import os
import duckdb
import datetime
import shutil

from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from dotenv import load_dotenv

In [11]:
class GDriveFolder:
    def __init__(self, credentials, raw_folder_items, processed_folder_items, folder_id):
        self.credentials = credentials
        self.raw_folder_items = raw_folder_items
        self.processed_folder_items = processed_folder_items
        self.folder_id = folder_id
        self.SCOPES = ['https://www.googleapis.com/auth/drive']
        self.service = self._authentication()
        self._download_files_gdrive()

    def _authentication(self):
        try:
            creds = None
            scopes = self.SCOPES
            token_path = os.path.join(self.credentials,'token.json')

            if os.path.exists(token_path):
                creds = Credentials.from_authorized_user_file(token_path)

            if not creds or not creds.valid:
                if creds and creds.expired and creds.refresh_token:
                    creds.refresh(Request())

                else:
                    flow = InstalledAppFlow.from_client_secrets_file(
                        token_path, scopes)
                    creds = flow.run_local_server(port=0)

                with open(token_path, 'w') as token:
                    token.write(creds.to_json())

            return build('drive', 'v3', credentials=creds)
        
        except Exception as e:
            print('Error authenticating: ', e)
            return None
        
    def _download_files_gdrive(self):
        if not self.service:
            print('Error authenticating.')
            return
        
        try:
            service = self.service
            folder_id = self.folder_id

            results = service.files().list(
                q=f"'{folder_id}' in parents",
                fields="files(id, name)"
            ).execute()

            items = results.get('files', [])

            if not items:
                print('No files found.')

            else:
                for item in items:
                    file_id = item['id']
                    file_name = item['name']
                    raw_folder_items = os.path.join(self.raw_folder_items, file_name)
                    processed_folder_items = os.path.join(self.processed_folder_items, file_name)
                    
                    if os.path.exists(raw_folder_items) or os.path.exists(processed_folder_items):
                        pass
                    else:
                        request = service.files().get_media(fileId=file_id)
                        with open(raw_folder_items, 'wb') as f:
                            f.write(request.execute())

        except HttpError as e:
            print("An HTTP error occurred during the download:", e)

        except OSError as e:
            print("A system error occurred during the download:", e)
            
        except Exception as e:
            print("An error occurred during the download:", e)

In [13]:
def main():
    # GDriveFolder configs
    credentials = 'C://Tecnology//Projects//dpp-duckdb-processing-persistence//config//credentials'
    raw_folder_items = "C://Tecnology//Projects//dpp-duckdb-processing-persistence//data//raw"
    processed_folder_items = "C:/Tecnology//Projects//dpp-duckdb-processing-persistence//data//processed"
    dotenv_path = 'C://Tecnology//Projects//dpp-duckdb-processing-persistence//config//.env'
    load_dotenv(dotenv_path)
    folder_id=os.getenv("FOLDER_ID")

    # GdriveFolder class
    gdrive = GDriveFolder(credentials, raw_folder_items, processed_folder_items, folder_id)

In [14]:
if __name__ == '__main__':
    main()