# BT4Py TT5 Top-1

In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [24]:
import scripts
print(scripts.__path__)

['/nfs/home/bsparks/mdti4py/scripts/scripts']


In [25]:
import pathlib

from scripts.common.schemas import TypeCollectionCategory
from scripts.infer.structure import DatasetFolderStructure

tool = "TypeT5TopN1"
dataset = DatasetFolderStructure(pathlib.Path(
    "/nfs/home/bsparks/mdti4py/datasets/better-types-4-py-dataset"
))
assert dataset.dataset_root.is_dir(), f"{dataset.dataset_root} not a directory!"

print(dataset)

BetterTypes4Py @ /nfs/home/bsparks/mdti4py/datasets/better-types-4-py-dataset


In [26]:
import logging
from importlib import reload

logging.shutdown()
reload(logging)

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
for handler in logger.handlers:
    logger.removeHandler(handler)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)

ch.setFormatter(logging.Formatter(f"[{tool} @ %(levelname)s]: %(message)s"))
logger.addHandler(ch)

logger.info("Hello World!")

[TypeT5TopN1 @ INFO]: Hello World!


#  Prediction Metrics for Full Accuracy

In [27]:
!pip install colored plotly



In [28]:
# Because our analysis reviews more datapoints than these models actually regard, reuse TypeT5 metrics instead
import tqdm

from typet5.static_analysis import PythonProject, SignatureMap, AccuracyMetric, SignatureErrorAnalysis
from typet5.experiments.typet5 import accs_as_table_row
from typet5.visualization import pretty_print_dict

from scripts.common.output import InferenceArtifactIO

test_set = dataset.test_set()

projects = dict()
for project in (pbar := tqdm.tqdm(test_set, desc=f"Loading ground truths")):
    pbar.set_postfix({"project": str(project)})
    projects[project.name] = PythonProject.parse_from_root(project)

Loading ground truths: 100%|█| 50/50 [01:18<00:00,  1.57s/it, project=/nfs/home/bsparks/mdti4py/datasets/better-types-


In [29]:
# Constants
# Common Type Names
from typet5.model import ModelWrapper
model = ModelWrapper.load_from_hub("MrVPlusOne/TypeT5-v7")
common_names = model.common_type_names
del model

from scripts.common.schemas import TypeCollectionCategory

# Because our analysis reviews more datapoints than these models actually regard, reuse TypeT5 metrics instead
from scripts.common.output import InferenceArtifactIO


def type_t5_metrics(task: TypeCollectionCategory) -> None:
    assignments = dict()

    for project in (pbar := tqdm.tqdm(test_set, desc=f"Loading predictions for {task}")):
        artifact = InferenceArtifactIO(
            artifact_root=pathlib.Path("/nfs/home/bsparks/mdti4py/datasets"),
            dataset=dataset,
            repository=project,
            tool_name=tool,
            task=task
        )
        pbar.set_postfix({"project": artifact.relative_location()})
        
        (tt5_predictions, tt5logits) = artifact.read()
        #assignments[project.name] = {key: pred[0] for key, pred in tt5_predictions.items()}
        assignments[project.name] = tt5_predictions

                
    label_signatures: dict[str, SignatureMap] = {
        project_name: {e.path: e.get_signature() for e in labels.all_elems()}
        for project_name, labels in projects.items()
    }
    # pred_signatures: dict[str, SignatureMap] = {n: dict() for n in label_signatures}
        
    pred_signatures = assignments 
    
    metrics = AccuracyMetric.default_metrics(common_type_names=common_names)
    # acc_metric = AccuracyMetric(common_type_names=ubiq_names)

    n_annots = sum([e.get_signature().n_annots() for _, p in projects.items() for e in p.all_elems()])
    n_labels = sum([e.n_annotated() for lm in label_signatures.values() for e in lm.values()])

    logger.info(f"n_annots: {n_annots}, n_labels: {n_labels}")
    logger.info(f"Ratio: {n_labels / n_annots}")

    accs = {
        m.name: SignatureErrorAnalysis(
            pred_signatures,
            label_signatures,
            m,
            error_on_mismatched_signature=False,
        ).accuracies
        for m in metrics
    }
    accs_as_table_row(accs)
    pretty_print_dict(accs)

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

In [30]:
type_t5_metrics(task="all")

Loading predictions for all: 100%|█| 50/50 [00:03<00:00, 16.41it/s, project=BetterTypes4Py/kornicameister__axion/TypeT
[TypeT5TopN1 @ INFO]: n_annots: 30070, n_labels: 16520
[TypeT5TopN1 @ INFO]: Ratio: 0.5493847688726305


Accuracies on all types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
66.68 & 67.83 & 75.48 & 35.89 & 74.14
Accuracies on common types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
76.39 & 78.88 & 84.61 & 48.72 & 83.19
Accuracies on rare types:
header:  ['full.all', 'calibrated.all', 'calibrated.simple', 'calibrated.complex', 'base.all']
49.05 & 52.62 & 61.60 & 24.13 & 58.24
full_acc:
   full_acc: 66.68% (count=16.5k)
   full_acc_by_cat:
      FuncArg: 61.26% (count=8.4k)
      FuncReturn: 77.50% (count=6.0k)
      ClassAtribute: 57.23% (count=2.0k)
      GlobalVar: 60.18% (count=113)
   full_acc_by_simple:
      complex: 34.92% (count=3.4k)
      simple: 75.00% (count=13.1k)
   full_acc_label_size: 1.4214
   full_acc_pred_size: 1.4212
   full_acc_ignored_labels: 0
full_acc_common:
   full_acc_common: 76.39% (count=10.7k)
   full_acc_common_by_cat:
      FuncArg: 76.54% (count=5.1k)
    

In [31]:
type_t5_metrics(task=TypeCollectionCategory.CALLABLE_PARAMETER)

Loading predictions for CALLABLE_PARAMETER:   0%| | 0/50 [00:00<?, ?it/s, project=BetterTypes4Py/nubark__instark/TypeT


FileNotFoundError: [Errno 2] No such file or directory: '/nfs/home/bsparks/mdti4py/datasets/BetterTypes4Py/nubark__instark/TypeT5TopN1/CALLABLE_PARAMETER/TypeT5TopN1-artifacts.pickle'

In [None]:
type_t5_metrics(task=TypeCollectionCategory.CALLABLE_RETURN)

In [None]:
type_t5_metrics(task=TypeCollectionCategory.VARIABLE)