In [7]:
import subprocess
import os
import sys
from pathlib import Path
from tqdm import tqdm
import time
import threading
import queue

# S'assurer d'être à la racine du projet
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
SCRIPT_DIR = PROJECT_ROOT / 'KB_building'/ 'scripts'
print(f"🎯 Project root: {PROJECT_ROOT}")
os.chdir(PROJECT_ROOT)



🎯 Project root: /Users/vernetemmanueladjobi/Documents/RessourcesStages/Projets/VulRAG-Hybrid-System


In [None]:
def run_cpg_pipeline():
    """Runs the CPG pipeline with a minimal progress bar"""
    
    cmd = ["bash", str(SCRIPT_DIR / "Cpg_pipeline.sh")]
    
    # Queue for communication between threads
    output_queue = queue.Queue()
    
    def read_output(process, q):
        """Thread to read the process output"""
        try:
            # Read the output from the process line by line
            for line in iter(process.stdout.readline, ''):
                if line:
                    # Put each line in the output queue
                    q.put(('stdout', line.strip()))
            # Close the stdout pipe
            process.stdout.close()
            # Put a special "done" message in the queue
            q.put(('done', None))
        except Exception as e:
            # Put an error message in the queue if something goes wrong
            q.put(('error', str(e)))
    
    # Run the process
    process = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
        bufsize=1
    )
    
    # Thread to read the output
    reader_thread = threading.Thread(target=read_output, args=(process, output_queue))
    reader_thread.daemon = True
    reader_thread.start()
    
    # Progress bar with important steps
    with tqdm(desc="CPG Pipeline", unit="step", ncols=100) as pbar:
        step_count = 0
        last_update = time.time()
        
        while True:
            try:
                # Timeout to update the bar even without output
                msg_type, content = output_queue.get(timeout=2.0)
                
                if msg_type == 'done':
                    break
                elif msg_type == 'error':
                    pbar.set_description(f"Error: {content}")
                    break
                elif msg_type == 'stdout':
                    # Filter important lines for the progress bar
                    if any(keyword in content for keyword in [
                        "Processing CWE-", "Step", "FINAL RESULTS", 
                        "CPG Extraction", "Sanity Check", "pairs processed"
                    ]):
                        step_count += 1
                        # Truncate the message for the bar
                        display_msg = content[:60] + "..." if len(content) > 60 else content
                        pbar.set_description(f"{display_msg}")
                        pbar.update(1)
                        last_update = time.time()
                
            except queue.Empty:
                # Timeout - update the display
                if time.time() - last_update > 5:
                    pbar.set_description("Pipeline running...")
                    last_update = time.time()
                continue
    
    # Wait for the process to finish
    return_code = process.wait()
    
    if return_code == 0:
        pbar.set_description("Pipeline completed successfully")
        print(f"\nPipeline CPG completed successfully!")
        print(f"Logs: {PROJECT_ROOT}/logs/extract.log")
        print(f"Generated CPGs: {PROJECT_ROOT}/data/tmp/cpgs/")
        print(f"Reports: {PROJECT_ROOT}/results/cpg_extraction/")
    else:
        pbar.set_description(f"Pipeline failed (code: {return_code})")
        print(f"\nPipeline failed with code: {return_code}")
        print(f"Check logs: {PROJECT_ROOT}/logs/extract.log")
    
    return return_code

In [6]:

# Execution
print("Launching CPG pipeline...")
print("Detailed output in logs, minimal display here")
print("-" * 60)

return_code = run_cpg_pipeline()

print("-" * 60)
print(f"Terminated with return code: {return_code}")

Launching CPG pipeline...
Detailed output in logs, minimal display here
------------------------------------------------------------


📋 Next Steps:: : 18step [18:15, 60.86s/step]                                


Pipeline CPG completed successfully!
Logs: /Users/vernetemmanueladjobi/Documents/RessourcesStages/Projets/VulRAG-Hybrid-System/logs/extract.log
Generated CPGs: /Users/vernetemmanueladjobi/Documents/RessourcesStages/Projets/VulRAG-Hybrid-System/data/tmp/cpgs/
Reports: /Users/vernetemmanueladjobi/Documents/RessourcesStages/Projets/VulRAG-Hybrid-System/results/cpg_extraction/
------------------------------------------------------------
Terminated with return code: 0





JSONDecodeError: Expecting ',' delimiter: line 19 column 31 (char 582)