In [7]:
from typing import List, Tuple, Dict, Any, Optional, Callable
from pandarallel import pandarallel
import os
import re
import sys
import json
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
from tqdm.auto import tqdm
tqdm.pandas()
pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [4]:
PATH_ROOT_DIR: str = ".."

PATH_TO_API: str = "../available_apis/qiskit.json"

In [5]:
def read_api(path_json_file_with_api: str) -> Dict[str, Any]:
    with open(path_json_file_with_api, "r") as file_with_api:
        api = json.load(file_with_api)
    return api


api_data = read_api(PATH_TO_API)
# print the attributes of the api
print(api_data[0].keys())

dict_keys(['api_name', 'full_api_name', 'api_description', 'api_signature', 'file_path'])


## Explore Names

In [53]:
def print_api(
        records: List[Dict[str, Any]],
        attributes_to_print: List[str],
        n_samples: int = None,
        filter_fns_must_satisfy_all: List[Callable] = None,
        filter_fns_must_satisfy_at_least_one: List[Callable] = None) -> None:
    """Print the first n_samples of the api with the selected attributes."""
    printed_samples = 0
    for api in records:
        if n_samples and printed_samples >= n_samples:
            break
        if filter_fns_must_satisfy_all and not all(
                [fn(api) for fn in filter_fns_must_satisfy_all]):
            continue
        if filter_fns_must_satisfy_at_least_one and not any(
                [fn(api) for fn in filter_fns_must_satisfy_at_least_one]):
            continue
        for attribute in attributes_to_print:
            print(f"{attribute}: {api[attribute]}")
        print("-" * 80)
        printed_samples += 1


print_api(
    records=api_data,
    attributes_to_print=[
        "api_name",
        "full_api_name"
    ],
    n_samples=20,
    filter_fns_must_satisfy_all=[
        # not a test function
        lambda api: not api["api_name"].startswith("test"),
        # not a private function
        lambda api: not api["api_name"].startswith("_"),
        # not a deprecated function
        lambda api: "deprec" not in api["full_api_name"].lower(),
        # not a decorator
        lambda api: "decorator" not in api["full_api_name"].lower(),
    ])

api_name: short_path
full_api_name: short_path
--------------------------------------------------------------------------------
api_name: print_main
full_api_name: print_main
--------------------------------------------------------------------------------
api_name: create_parser
full_api_name: create_parser
--------------------------------------------------------------------------------
api_name: filter_by_result
full_api_name: filter_by_result
--------------------------------------------------------------------------------
api_name: run_filter_script
full_api_name: run_filter_script
--------------------------------------------------------------------------------
api_name: discover_files
full_api_name: discover_files
--------------------------------------------------------------------------------
api_name: validate_path
full_api_name: validate_path
--------------------------------------------------------------------------------
api_name: discover_files
full_api_name: discover_files
---

In [54]:
n_data = len(api_data)
print(f"Number of APIs: {n_data}")

Number of APIs: 14324
