# Imports & Auth

In [23]:
import os
# import io
import re
# import sys
import json
import math
# import copy
# import time
# import shlex
# import shutil
# import random
# import pathlib
# import subprocess
# import traceback
# import concurrent.futures

# import docker
# import nbformat
# import gspread

from pathlib import Path
# from functools import partial
from datetime import datetime
# from tqdm.notebook import tqdm
# from concurrent.futures import ThreadPoolExecutor, as_completed
# from typing import Callable, Dict, List, Sequence, Iterable, Union

import pandas as pd
# from rclone_python import rclone
# from nbclient import NotebookClient
# from rclone_python.remote_types import RemoteTypes

# from google.auth import default
from google.oauth2 import service_account
from googleapiclient.discovery import build, Resource
# from googleapiclient.http import BatchHttpRequest, MediaIoBaseDownload, MediaIoBaseUpload
from googleapiclient.errors import HttpError

## Utils

In [24]:
def extract_file_id(url):
    patterns = [
        r"/spreadsheets/d/([^/]+)",
        r"/file/d/([^/]+)",     # Matches /file/d/{file_id}
        r"[?&]id=([^&]+)",       # Matches ?id={file_id} or &id={file_id}
        r"/drive/([^/?#]+)",     # Matches /drive/{file_id} and stops at /, ?, or #
        r"/folders/([^/]+)"      # Matches /folders/{folder_id}
    ]

    for pattern in patterns:
        match_ = re.search(pattern, url)
        if match_:
            return match_.group(1).strip()  

# Parse Proto
Please ensure you have copied your protobuf file in the root of this project

In [7]:
from proto_reader import parse_proto_file

proto_file_name = 'tool_use_metadata_set_v2.pb'
samples = parse_proto_file(proto_file_name)

with open('pb_jsons.json', 'w') as f:
    json.dump({'result': samples}, f, indent=4)     

# Create Batches and Configuration Files for Docker Runs

In [25]:
proto_json = f'pb_jsons.json'
with open(proto_json, 'r') as f:
    all_samples = json.load(f)['result']

colab_urls = [sample['colab_url'] if 'colab_url' in sample else sample['colab_link'] for sample in all_samples]
colab_ids = [extract_file_id(colab_url) for colab_url in colab_urls]

total_samples = len(colab_ids)
max_container = 700
max_batch_size = math.ceil(total_samples / max_container)
print(f'Max Batches: {math.ceil(total_samples/max_batch_size)}\nMax Samples Per Batch: {max_batch_size}')

api_version = '0.1.0'
notebooks = [{'path': notebook, 'api_version': api_version} for notebook in colab_ids]
notebooks_df = pd.DataFrame(notebooks)
for idx, api_version in enumerate(set(notebooks_df['api_version'])):
    count_notebooks = len(notebooks_df[notebooks_df['api_version']==api_version])
    batches = []
    for idx in range(count_notebooks):
        batches.append(idx//max_batch_size)
    batch_ids = [f"{api_version}_{batch}" for batch in batches]
    notebooks_df.loc[notebooks_df['api_version'] == api_version, 'batch_id'] = batch_ids

notebooks_df.to_csv('execution_configs.csv', index=False)

Max Batches: 608
Max Samples Per Batch: 5


# Docker Orchestration

## For Local Run (where you have root access)

In [26]:
exec_config = pd.read_csv("execution_configs.csv")
run_identifiers = list(set(exec_config['batch_id']))

In [None]:
import sanity_orchestrator_with_download as orchestrator
orchestrator.DOCKER_IMAGE = 'sanity-runner-proto-colab'
try:
    start_time = datetime.now()
    run_name = f'sanity_check_{start_time.strftime("%Y%m%d_%H%M%S")}'
    orchestrator.run_orchestration(run_name, ['0.1.0_0', '0.1.0_1', '0.1.0_2', '0.1.0_3', '0.1.0_4'], "Proto")
    print(f"Finished Docker Run. Time Taken: {(datetime.now()-start_time).seconds} Seconds")
except (FileNotFoundError, FileExistsError, ConnectionError) as e:
    print(f"\n❌ A critical error occurred: {e}")

--- Step 1: Validating Host Environment ---
✅ Docker client connected.

--- Step 2: Preparing Host Directories ---
✅ Created log directory for this run at: /Users/nabeel/PycharmProjects/e2e_sanity_checks/execution_logs/sanity_check_20250821_195548
✅ Created result directory for this run at: /Users/nabeel/PycharmProjects/e2e_sanity_checks/results/sanity_check_20250821_195548
✅ Created result directory for this run at: /Users/nabeel/PycharmProjects/e2e_sanity_checks/executed_notebooks/sanity_check_20250821_195548

--- Step 4: Launching Containers in Parallel ---
  -> Launching container 'sanity_check_20250821_195548-0' for batch 0...
  -> Launching container 'sanity_check_20250821_195548-1' for batch 1...
  -> Launching container 'sanity_check_20250821_195548-2' for batch 2...
  -> Launching container 'sanity_check_20250821_195548-3' for batch 3...
  -> Launching container 'sanity_check_20250821_195548-4' for batch 4...

--- Step 5: Waiting for All Containers to Finish ---
  -> ✅ SUCCESS

## For where you need to use sudo to run docker

In [1]:
!sudo .venv/bin/python runner.py

Password:sudo: a password is required


# Process Results

In [28]:
output_dir = f'results/{run_name}'
output_files = os.listdir(output_dir)
complete_data = []
for file in output_files:
    full_path = Path(output_dir) / file
    with open(full_path, 'r') as f:
        complete_data += json.load(f)['result']
# Use json_normalize to flatten the data
sanity_df = pd.json_normalize(complete_data)
sanity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 11 columns):
 #   Column                                                Non-Null Count  Dtype 
---  ------                                                --------------  ----- 
 0   notebook                                              25 non-null     object
 1   contains_golden_answer                                25 non-null     object
 2   contains_final_assert                                 25 non-null     bool  
 3   script_passed                                         25 non-null     bool  
 4   script_failure_msg                                    25 non-null     object
 5   Set Up - Install Dependencies and Clone Repositories  25 non-null     object
 6   Set Up - Import APIs and initiate DBs                 25 non-null     object
 7   Final Assertion_NO_ACTION                             25 non-null     object
 8   Initial Assertion                                     25 non-null     ob

In [29]:
FA_FAILED_ASSERTION = 'FA Failed - Assertion Error'
IA_FAILED_ASSERTION = 'IA Failed - Assertion Error'
NON_ASSERTION_ERROR = 'Non Assertion Error'
ASSERTION_ERROR = "Assertion Error"
NO_ERROR_FOUND = 'No Error Found'
UNDEFINED_ERROR = 'Undefined Error Type'

NEEDS_FIXES = 'Needs Fixes'
GOOD_TO_GO = 'Good To Go'
NEEDS_MANUAL_REVIEW = 'Needs Manual Review'
CHECK_NOT_EXECUTED = 'Check Not Executed'

def add_error_type(error_message):
    if error_message == "":
        return NO_ERROR_FOUND
    error_type = error_message.split('\n')[0].split(':')[-1].strip()

    if error_type != 'AssertionError':
        return NON_ASSERTION_ERROR
    if error_type == 'AssertionError':
        return ASSERTION_ERROR

    return UNDEFINED_ERROR

def get_auto_qc_status(row):
    init_status = row['Execution Status Initialisation']
    status_fa_no_action = row['Execution Status FA w/o Action']
    status_ia = row['Execution Status IA']
    status_action = row['Execution Status Action']
    status_fa = row['Execution Status FA'] 
    contains_final_assert = row['contains_final_assert']
    script_success = row['script_passed']


    status = ""
    message = ""
    
    if not script_success:
        status = NEEDS_FIXES
        message = "Failed: Script to run Auto QC failed"
        return pd.Series((status, message))
    
    if NON_ASSERTION_ERROR in [init_status, status_fa_no_action, status_ia, status_action, status_fa]:
        status = NEEDS_FIXES
        message = "Failed: One of the code block contains Non Assertion Error(s)"

    elif ASSERTION_ERROR in [status_ia]:
        status = NEEDS_FIXES
        message = "Failed: Assertion Failure in Initial Assertion."

    elif ASSERTION_ERROR in [status_fa]:
        status = NEEDS_FIXES
        message = "Failed: Final Assertion Failure even when Action is executed. Either Final Assertion or Action needs to be fixed."

    elif ASSERTION_ERROR in [status_fa_no_action]:
        status = GOOD_TO_GO
        message = "Passes: All Steps executed successfully and FA failed w/o action."
        
    else:
        if all(status==NO_ERROR_FOUND for status in [init_status, status_fa_no_action, status_ia, status_action, status_fa]):
            if contains_final_assert:
                status = NEEDS_FIXES
                message = "Failed: If FA is present, it must fail in absense of the action"
            else:
                status = GOOD_TO_GO
                message = "Passed: No FA block found so FA without action is expected to pass."
        
    return pd.Series((status, message))


In [30]:
sanity_df['Execution Status Install Dependencies and Clone Repositories'] = sanity_df['Set Up - Install Dependencies and Clone Repositories'].apply(add_error_type)
sanity_df['Execution Status Initialisation'] = sanity_df['Set Up - Import APIs and initiate DBs'].apply(add_error_type)
sanity_df['Execution Status FA w/o Action'] = sanity_df['Final Assertion_NO_ACTION'].apply(add_error_type)
sanity_df['Execution Status IA'] = sanity_df['Initial Assertion'].apply(add_error_type)
sanity_df['Execution Status Action'] = sanity_df['Action'].apply(add_error_type)
sanity_df['Execution Status FA'] = sanity_df['Final Assertion'].apply(add_error_type)

In [31]:
sanity_df = sanity_df.rename(columns={'notebook': 'colab_id'})

In [32]:
sanity_df[['Auto QC Status', 'Auto QC Message']] = sanity_df.apply(get_auto_qc_status, axis=1)
sanity_df['Auto QC Status'].value_counts()

Auto QC Status
Good To Go     15
Needs Fixes    10
Name: count, dtype: int64

In [33]:
sanity_df[sanity_df['Auto QC Status']=='Needs Fixes']

Unnamed: 0,colab_id,contains_golden_answer,contains_final_assert,script_passed,script_failure_msg,Set Up - Install Dependencies and Clone Repositories,Set Up - Import APIs and initiate DBs,Final Assertion_NO_ACTION,Initial Assertion,Action,Final Assertion,Execution Status Install Dependencies and Clone Repositories,Execution Status Initialisation,Execution Status FA w/o Action,Execution Status IA,Execution Status Action,Execution Status FA,Auto QC Status,Auto QC Message
10,1QVw4qn9Ezd6yWcoARJ8YzSW78lMtw6Al,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,,ErrorType: NameError\nError Description: name ...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,No Error Found,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
11,1Eb6bQv2vmlAa-6Nnm0egPzNmZAa0bQbG,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,,ErrorType: NameError\nError Description: name ...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,No Error Found,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
12,12O95HAUa38nFBuQ7LJ9DQQidPa0gdpiT,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,,ErrorType: NameError\nError Description: name ...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,No Error Found,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
13,1E_yAoDYmnmHlxArXoOfV4BzifUVdyRwl,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,,ErrorType: NameError\nError Description: name ...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,No Error Found,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
14,1zbrxQcvdlxVsdMZKyDXxzA1nqa5FD0Kr,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,,ErrorType: NameError\nError Description: name ...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,No Error Found,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
18,12yG_x4RMg7R_7lIehM3nGOEFjfSiUlc4,TODO,True,True,,,,ErrorType: AssertionError\nError Description: ...,ErrorType: FileNotFoundError\nError Descriptio...,ErrorType: InvalidInputError\nError Descriptio...,ErrorType: AssertionError\nError Description: ...,No Error Found,No Error Found,Assertion Error,Non Assertion Error,Non Assertion Error,Assertion Error,Needs Fixes,Failed: One of the code block contains Non Ass...
20,14RaVqtVItZYZj-TLrVrfEPfTMjYfaGU8,TODO,True,True,,,,,,,,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,Needs Fixes,"Failed: If FA is present, it must fail in abse..."
21,11HFqEAKOlmy6cCb5PKnNcOU-NgUU2wO3,TODO,True,True,,,,,,,,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,Needs Fixes,"Failed: If FA is present, it must fail in abse..."
22,1X6EmU9Yb1xtMNdjkhoqyptafBwfq5-IH,TODO,True,True,,,,,,,,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,Needs Fixes,"Failed: If FA is present, it must fail in abse..."
24,114T33U6eAF5IXH90Se16UY8ecOULrbjc,TODO,True,True,,,,,,,,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,No Error Found,Needs Fixes,"Failed: If FA is present, it must fail in abse..."


In [19]:
sanity_df['script_passed'].value_counts()

script_passed
True    25
Name: count, dtype: int64

In [20]:
def trim_text(text):
    return text[:49999]

for col in sanity_df.select_dtypes(include=['object', 'string']).columns.tolist():
    sanity_df[col] = sanity_df[col].apply(trim_text)

In [21]:
sanity_df.to_csv('auto_qc_result.csv', index=False)
