# Imports & Auth

In [11]:
from google.oauth2 import service_account
from googleapiclient.discovery import build

SERVICE_ACCOUNT_FILE = 'turing-delivery-g-ga-e36eb2300714.json'

# Combine scopes for both Drive and Sheets
SCOPES = [
    "https://www.googleapis.com/auth/drive",
    "https://www.googleapis.com/auth/spreadsheets",
]

def authenticate_with_service_account():
    """Authenticate using a service account and return credentials."""
    creds = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE,
        scopes=SCOPES
    )
    return creds

# Get the shared credentials object
credentials = authenticate_with_service_account()
drive_service = build("drive", "v3", credentials=credentials)

# Download APIs Code

In [12]:
def download_apis(VERSION="0.1.0", download_datasets=False, save_directory='clean_workspace'):
    import io
    import os
    import sys
    import zipfile
    import shutil
    import re
    # from google.colab import auth
    from googleapiclient.discovery import build
    from googleapiclient.http import MediaIoBaseDownload

    global drive_service
    # Version to download
    # VERSION = "0.0.rev22final" # Version of the API
    
    # Define paths
    CONTENT_DIR = os.path.join(save_directory, VERSION)
    if os.path.exists(CONTENT_DIR):
        os.remove(CONTENT_DIR)
    os.makedirs(CONTENT_DIR, exist_ok=True)
    
    APIS_DIR = os.path.join(CONTENT_DIR, 'APIs')
    DBS_DIR = os.path.join(CONTENT_DIR, 'DBs')
    SCRIPTS_DIR = os.path.join(CONTENT_DIR, 'Scripts')
    FC_DIR = os.path.join(CONTENT_DIR, 'Schemas')
    ZIP_PATH = os.path.join(CONTENT_DIR, f'APIs_V{VERSION}.zip')
    
    # Google Drive Folder ID where versioned APIs zip files are stored
    APIS_FOLDER_ID = '1QpkAZxXhVFzIbm8qPGPRP1YqXEvJ4uD4'
    
    # List of items to extract from the zip file
    ITEMS_TO_EXTRACT = ['APIs/', 'DBs/', 'Scripts/']
    
    # Clean up existing directories and files
    for path in [APIS_DIR, DBS_DIR, SCRIPTS_DIR, FC_DIR, ZIP_PATH]:
        if os.path.exists(path):
            if os.path.isdir(path):
                shutil.rmtree(path)
            else:
                os.remove(path)
    
    # Authenticate and create the drive service
    # auth.authenticate_user()
    # drive_service = build('drive', 'v3')
    # drive_service
    # Helper function to download a file from Google Drive
    def download_drive_file(service, file_id, output_path, file_name=None, show_progress=True):
        """Downloads a file from Google Drive"""
        destination = output_path
        request = service.files().get_media(fileId=file_id)
        with io.FileIO(destination, 'wb') as fh:
            downloader = MediaIoBaseDownload(fh, request)
            done = False
            while not done:
                status, done = downloader.next_chunk()
                if show_progress:
                    print(f"Download progress: {int(status.progress() * 100)}%")
    
    
    # 1. List files in the specified APIs folder
    print(f"Searching for APIs zip file with version {VERSION} in folder: {APIS_FOLDER_ID}...")
    apis_file_id = None
    
    try:
        query = f"'{APIS_FOLDER_ID}' in parents and trashed=false"
        results = drive_service.files().list(q=query, fields="files(id, name)").execute()
        files = results.get('files', [])
        for file in files:
            file_name = file.get('name', '')
            if file_name.lower() == f'apis_v{VERSION.lower()}.zip':
                apis_file_id = file.get('id')
                print(f"Found matching file: {file_name} (ID: {apis_file_id})")
                break
    
    except Exception as e:
        print(f"An error occurred while listing files in Google Drive: {e}")
    
    if not apis_file_id:
        print(f"Error: Could not find APIs zip file with version {VERSION} in the specified folder.")
        sys.exit("Required APIs zip file not found.")
    
    # 2. Download the found APIs zip file
    print(f"Downloading APIs zip file with ID: {apis_file_id}...")
    download_drive_file(drive_service, apis_file_id, ZIP_PATH, file_name=f'APIs_V{VERSION}.zip')
    
    # 3. Extract specific items from the zip file to /content
    print(f"Extracting specific items from {ZIP_PATH} to {CONTENT_DIR}...")
    try:
        with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            zip_contents = zip_ref.namelist()
    
            for member in zip_contents:
                extracted = False
                for item_prefix in ITEMS_TO_EXTRACT:
                  if member == item_prefix or member.startswith(item_prefix):
                        zip_ref.extract(member, CONTENT_DIR)
                        extracted = True
                        break
    
    except zipfile.BadZipFile:
        print(f"Error: The downloaded file at {ZIP_PATH} is not a valid zip file.")
        sys.exit("Invalid zip file downloaded.")
    except Exception as e:
        print(f"An error occurred during extraction: {e}")
        sys.exit("Extraction failed.")
    
    
    # 4. Clean up
    if os.path.exists(ZIP_PATH):
        os.remove(ZIP_PATH)
    
    # 5. Add APIs to path
    if os.path.exists(APIS_DIR):
        sys.path.append(APIS_DIR)
    else:
        print(f"Error: APIS directory not found at {APIS_DIR} after extraction. Cannot add to path.")
    
    # 6. Quick verification
    # Check for the presence of the extracted items
    verification_paths = [APIS_DIR, DBS_DIR, SCRIPTS_DIR]
    all_present = True
    print("\nVerifying extracted items:")
    for path in verification_paths:
        if os.path.exists(path):
            print(f"✅ {path} is present.")
        else:
            print(f"❌ {path} is MISSING!")
            all_present = False
    
    if all_present:
        print(f"\n✅ Setup complete! Required items extracted to {CONTENT_DIR}.")
    else:
        print("\n❌ Setup failed! Not all required items were extracted.")

    # 7. Generate Schemas

    # Add Scripts to path
    if os.path.exists(CONTENT_DIR):
        sys.path.append(CONTENT_DIR)
    else:
        print(f"Error: CONTENT_DIR directory not found at {CONTENT_DIR} after extraction. Cannot add to path.")
    
    from Scripts.FCSpec import generate_package_schema
    
    print("\nGenerating FC Schemas")
    os.makedirs(FC_DIR, exist_ok=True)
    
    
    # Iterate through the packages in the /content/APIs directory
    for package_name in os.listdir(APIS_DIR):
        package_path = os.path.join(APIS_DIR, package_name)
    
        # Check if it's a directory (to avoid processing files)
        if os.path.isdir(package_path):
            # Call the function to generate schema for the current package
            generate_package_schema(package_path, output_folder_path=FC_DIR)
    print(f"✅ Successfully generated {len(os.listdir(FC_DIR))} FC Schemas to {FC_DIR}")

    if download_datasets:
        def download_drive_folder(service, folder_id, destination_path):
            """
            Recursively downloads all files in a Google Drive folder using the `download_drive_file`
            """
            os.makedirs(destination_path, exist_ok=True)
        
            query = f"'{folder_id}' in parents and trashed=false"
            page_token = None
        
            while True:
                results = service.files().list(
                    q=query,
                    spaces='drive',
                    fields='nextPageToken, files(id, name, mimeType)',
                    pageToken=page_token
                ).execute()
        
                for item in results.get('files', []):
                    file_id = item['id']
                    file_name = item['name']
                    mime_type = item['mimeType']
        
                    if mime_type == 'application/vnd.google-apps.folder':
                        # Recursively download subfolders
                        new_path = os.path.join(destination_path, file_name)
                        print(f"Creating subfolder and downloading: {new_path}")
                        download_drive_folder(service, file_id, new_path)
                    else:
                        # Construct full file path and pass it as output_path
                        full_path = os.path.join(destination_path, file_name)
                        print(f"Downloading file: {file_name} to {full_path}")
                        download_drive_file(service, file_id, full_path, file_name=file_name, show_progress=False)
        
                page_token = results.get('nextPageToken', None)
                if not page_token:
                    break
        
        # --- Configuration for Dataset Download ---
        # This FOLDER_ID should contain the 'Quotewk.csv' file.
        FOLDER_ID = "1tZqZB1vAxp4TTxbPm6O2YjfkZD4FM-ml"
        # DATASET_FOLDER = "./workspace/Datasets"
        DATASET_FOLDER = os.path.join(CONTENT_DIR, 'workspace/Datasets')
        
        print(f"Starting download of folder {FOLDER_ID} to {DATASET_FOLDER}...")
        download_drive_folder(drive_service, FOLDER_ID, DATASET_FOLDER)
        print("Dataset download complete.")

        # --- Configuration for WS Dataset Download ---
        # This FOLDER_ID should contain the 'WS Multihop Datasets' file.
        WS_DATA_ID = "1kmXZ1oarBPlE0OQL52eGoc1xPbupJ1n9"
        WS_DATA_ZIP_PATH = os.path.join(CONTENT_DIR, 'WS_DATA.zip')
        
        print(f"Downloading WS Dataset zip file with ID: {WS_DATA_ID}...")
        download_drive_file(drive_service, WS_DATA_ID, WS_DATA_ZIP_PATH, file_name=f'WS_DATA.zip')
        print("Dataset download complete.")
        
        # Extract the Datasets
        WS_DATA_ZIP_PATH = os.path.join(CONTENT_DIR, 'WS_DATA.zip')
        with zipfile.ZipFile(WS_DATA_ZIP_PATH, 'r') as zip_ref:
            zip_ref.extractall(CONTENT_DIR)
        print(f"Extracted to {CONTENT_DIR}")
        
        # Moving 'file_dataset_pb2.py' to root directory
        src_path = os.path.join(CONTENT_DIR, 'WS_DATA', 'file_dataset_pb2.py')
        dst_path = os.path.join(CONTENT_DIR, 'file_dataset_pb2.py')
        
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
            print(f"Moved {src_path} to {dst_path}")
        else:
            print(f"Source file not found: {src_path}")
        
        # Clean up
        if os.path.exists(WS_DATA_ZIP_PATH):
            os.remove(WS_DATA_ZIP_PATH)        

In [13]:
download_apis(download_datasets=True, save_directory='clean_workspace')

Searching for APIs zip file with version 0.1.0 in folder: 1QpkAZxXhVFzIbm8qPGPRP1YqXEvJ4uD4...
Found matching file: APIs_V0.1.0.zip (ID: 1hLV2slrHhH0RquKU-8oWRJRs_nHh5CT_)
Downloading APIs zip file with ID: 1hLV2slrHhH0RquKU-8oWRJRs_nHh5CT_...
Download progress: 100%
Extracting specific items from clean_workspace/0.1.0/APIs_V0.1.0.zip to clean_workspace/0.1.0...

Verifying extracted items:
✅ clean_workspace/0.1.0/APIs is present.
✅ clean_workspace/0.1.0/DBs is present.
✅ clean_workspace/0.1.0/Scripts is present.

✅ Setup complete! Required items extracted to clean_workspace/0.1.0.

Generating FC Schemas
✅ notes_and_lists Schema generation complete: clean_workspace/0.1.0/Schemas/notes_and_lists.json


Processing mutation notes_and_lists.mutations.m01...
✅ notes_and_lists.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/notes_and_lists.json

✅ google_maps Schema generation complete: clean_workspace/0.1.0/Schemas/google_maps.json


Processing mutation go

  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """
  """


✅ workday.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/workday.json

✅ azure Schema generation complete: clean_workspace/0.1.0/Schemas/azure.json


Processing mutation azure.mutations.m01...
✅ azure.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/azure.json

✅ media_control Schema generation complete: clean_workspace/0.1.0/Schemas/media_control.json


Processing mutation media_control.mutations.m01...
✅ media_control.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/media_control.json

✅ google_meet Schema generation complete: clean_workspace/0.1.0/Schemas/google_meet.json


Processing mutation google_meet.mutations.m01...
✅ google_meet.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/google_meet.json

✅ google_maps_live Schema generation complete: clean_workspace/0.1.0/Schemas/google_maps_live.json


Processing mutation google_maps_live.mutati

  """
  """
  """
  """
  """
  """


✅ github Schema generation complete: clean_workspace/0.1.0/Schemas/github.json


Processing mutation github.mutations.m01...
✅ github.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/github.json

✅ figma Schema generation complete: clean_workspace/0.1.0/Schemas/figma.json


Processing mutation figma.mutations.m01...
✅ figma.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/figma.json

✅ github_actions Schema generation complete: clean_workspace/0.1.0/Schemas/github_actions.json


Processing mutation github_actions.mutations.m01...
✅ github_actions.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/github_actions.json

✅ zendesk Schema generation complete: clean_workspace/0.1.0/Schemas/zendesk.json


Processing mutation zendesk.mutations.m01...
✅ zendesk.mutations.m01 Schema generation complete: clean_workspace/0.1.0/MutationSchemas/m01/zendesk.json

✅ google_people Schema generation co