In [1]:
import pathlib
from scripts.infer.structure import CrossDomainTypes4Py

dataset = CrossDomainTypes4Py(pathlib.Path("/nfs/data/students/bsparks/mdti4py-dataset-pool/cdt4py"))
artifact_root = pathlib.Path("/nfs/home/bsparks/mdti4py/datasets")

2023-09-25 15:08:31.876403: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
display(len(ts := dataset.test_set()))

1551

In [3]:
display(len(el_vs := dataset.el_validation_set()))

1254

In [4]:
display(len(el_ts := dataset.el_test_set()))

297

In [5]:
import importlib
import experiments.inferred

groundtruth = experiments.inferred.load_groundtruths(artifact_root, dataset)

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/extended_ground_truth.csv: 100%|█| 
/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/context.csv: 100%|█| 1551/1551 [00:


In [6]:
import pandas as pd
display(groundtruth.columns)
groundtruth = groundtruth.replace("<MISSING>", pd.NA)

Index(['file', 'category', 'qname', 'qname_ssa', 'raw_anno',
       'depth_limited_anno', 'adjusted_anno', 'base_anno', 'repository',
       'context_category', 'nested'],
      dtype='object')

In [7]:
groundtruth.base_anno.value_counts().head(n=10)

base_anno
str        50817
None       25572
int        22759
List       17856
bool       14300
Dict       14110
float       8582
Tuple       4714
Any         3960
ndarray     3934
Name: count, dtype: int64

In [8]:
import experiments.predictions
trait_form = groundtruth.rename(columns={"base_anno": "trait_gt_form"})
trait_form = trait_form[~trait_form.trait_gt_form.isin(["None", "Any"])]

ubiq_types = experiments.predictions.ubiquitous_types(trait_form)["trait_gt_form"]
common_types = experiments.predictions.common_types(trait_form)["trait_gt_form"]
rare_types = experiments.predictions.rare_types(trait_form)["trait_gt_form"]

In [9]:
ubiq_types.value_counts()

trait_gt_form
str     50817
int     22759
List    17856
bool    14300
Dict    14110
Name: count, dtype: int64

In [10]:
common_types.value_counts()

trait_gt_form
float              8582
Tuple              4714
ndarray            3934
Union              3736
DataFrame          2545
                   ... 
UserID              123
Client              123
VuforiaDatabase     123
Document            123
Node                120
Name: count, Length: 95, dtype: int64

In [11]:
from scripts.common.schemas import TypeCollectionCategory, RepositoryTypeCollectionSchema
import experiments.predictions

def statistics(split: pd.DataFrame) -> None:
    split = split[~split["base_anno"].isin(["None", "Any"])]
    split = split[~split["qname"].str.endswith(
        (".self", ".cls", ".args", ".kwargs", ".__init__", ".__len__", ".__str__", ".__repr__", ".__bool__", ".__float__")
    )]
    
    for key, group in split.groupby(by=RepositoryTypeCollectionSchema.category, sort=False):
        print("===", key, "===")
        print("total:", s := group[RepositoryTypeCollectionSchema.raw_anno].size)
        print("annotated:", c := group[RepositoryTypeCollectionSchema.raw_anno].count())
        print("ratio:", c / s)

    print("===", "total", "===")
    print("total:", s := split[RepositoryTypeCollectionSchema.raw_anno].size)
    print("annotated:", c := split[RepositoryTypeCollectionSchema.raw_anno].count())
    print("ratio:", c / s)
    print("unique raw:", split[RepositoryTypeCollectionSchema.raw_anno].nunique())
    print("unique adjusted:", split[RepositoryTypeCollectionSchema.adjusted_anno].nunique())
    print("unique base:", split[RepositoryTypeCollectionSchema.base_anno].nunique())
    
    print("ubiquitous:", split[RepositoryTypeCollectionSchema.base_anno].isin(ubiq_types).sum())
    print("common:", split[RepositoryTypeCollectionSchema.base_anno].isin(common_types).sum())
    print("rare:", split[RepositoryTypeCollectionSchema.base_anno].isin(rare_types).sum())
    
    print("unique rare:", split[split[RepositoryTypeCollectionSchema.base_anno].isin(rare_types)].base_anno.nunique())

In [12]:
statistics(groundtruth)

=== CALLABLE_RETURN ===
total: 158377
annotated: 53362
ratio: 0.3369302360822594
=== CALLABLE_PARAMETER ===
total: 240486
annotated: 139368
ratio: 0.5795264589206857
=== VARIABLE ===
total: 525287
annotated: 35697
ratio: 0.0679571358133744
=== total ===
total: 924150
annotated: 228427
ratio: 0.24717524211437536
unique raw: 26124
unique adjusted: 15467
unique base: 9263
ubiquitous: 118300
common: 59995
rare: 50103
unique rare: 9163


In [13]:
validation_ars = list(map(lambda r: str(dataset.author_repo(r)), el_vs.keys()))
validation_gt = groundtruth[groundtruth[RepositoryTypeCollectionSchema.repository].isin(validation_ars)]

statistics(validation_gt)

=== CALLABLE_RETURN ===
total: 123052
annotated: 43177
ratio: 0.35088417904625685
=== CALLABLE_PARAMETER ===
total: 191927
annotated: 114315
ratio: 0.5956170835786523
=== VARIABLE ===
total: 415637
annotated: 29347
ratio: 0.07060728472200502
=== total ===
total: 730616
annotated: 186839
ratio: 0.2557280431854764
unique raw: 21878
unique adjusted: 13151
unique base: 7834
ubiquitous: 95932
common: 49767
rare: 41111
unique rare: 7737


In [14]:
test_ars = list(map(lambda r: str(dataset.author_repo(r)), el_ts.keys()))
test_gt = groundtruth[groundtruth[RepositoryTypeCollectionSchema.repository].isin(test_ars)]

statistics(test_gt)

=== CALLABLE_RETURN ===
total: 35325
annotated: 10185
ratio: 0.28832271762208067
=== CALLABLE_PARAMETER ===
total: 48559
annotated: 25053
ratio: 0.5159290759694393
=== VARIABLE ===
total: 109650
annotated: 6350
ratio: 0.05791153670770634
=== total ===
total: 193534
annotated: 41588
ratio: 0.21488730662312566
unique raw: 5083
unique adjusted: 3362
unique base: 2048
ubiquitous: 22368
common: 10228
rare: 8992
unique rare: 1965


In [15]:
validation_rare_types = set(validation_gt[validation_gt[RepositoryTypeCollectionSchema.base_anno].isin(rare_types)].base_anno.unique())
test_rare_types = set(test_gt[test_gt[RepositoryTypeCollectionSchema.base_anno].isin(rare_types)].base_anno.unique())

In [16]:
print(len(validation_rare_types.difference(test_rare_types)))
print(len(test_rare_types.difference(validation_rare_types)))

7274
1428


In [17]:
from scripts.common.schemas import ExtendedTypeCollectionSchema

import experiments.inferred
from experiments.predictions import ubiquitous_types, co_occurrences

import pandera.typing as pt, pandas as pd, seaborn as sns
import importlib

import matplotlib.pyplot as plt

def adjusted_pipeline(tool: str, groundtruth: pt.DataFrame[ExtendedTypeCollectionSchema]) -> pd.DataFrame:
    importlib.reload(experiments.inferred)
    print(f"Loaded inferred")
    
    inferred = experiments.inferred.load_entire_inferred(artifact_root, dataset, tool_name=tool, task="all")
    experiments.inferred.error_if_duplicate_keys(inferred)
    print(f"No duplicate keys found")

    adjusted = experiments.inferred.typet5_adjusted_form(inferred)
    print(f"Converted to adjusted form")

    trivial_mask = groundtruth.adjusted_anno.str.endswith((".None", ".Any")) | groundtruth.adjusted_anno.isin(["None", "Any"])
    groundtruth = groundtruth[~trivial_mask]
    print("Removed trivial types from groundtruth (None and Any)")

    aligned = experiments.inferred.join_truth_to_preds(
        truth=groundtruth, 
        predictions=adjusted, 
        comparable_anno=ExtendedTypeCollectionSchema.adjusted_anno,
    )
    print(f"{aligned.shape}")
    print(f"Joined ground truth to predictions")
    
    evaluatable = experiments.inferred.evaluatable(aligned)
    assert evaluatable["gt_anno"].notna().all()
    assert evaluatable["anno"].notna().all()
    
    print(f"Reduced to evaluatable: {evaluatable.shape}")
    return evaluatable

def base_pipeline(tool: str, groundtruth: pt.DataFrame[ExtendedTypeCollectionSchema]) -> pd.DataFrame:
    importlib.reload(experiments.inferred)   
    inferred = experiments.inferred.load_entire_inferred(artifact_root, dataset, tool_name=tool, task="all")
    print(f"Loaded inferred")
    
    experiments.inferred.error_if_duplicate_keys(inferred)
    print(f"No duplicate keys found")

    adjusted = experiments.inferred.typet5_base_form(inferred)
    print(f"Converted to base form")

    trivial_mask = groundtruth.base_anno.isin(["None", "Any"])
    groundtruth = groundtruth[~trivial_mask]
    print("Removed trivial types from groundtruth (None and Any)")

    aligned = experiments.inferred.join_truth_to_preds(
        truth=groundtruth, 
        predictions=adjusted, 
        comparable_anno=ExtendedTypeCollectionSchema.base_anno,
    )
    print(f"{aligned.shape}")
    print(f"Joined ground truth to predictions")
    
    evaluatable = experiments.inferred.evaluatable(aligned)
    assert evaluatable["gt_anno"].notna().all()
    assert evaluatable["anno"].notna().all()
    
    print(f"Reduced to evaluatable: {evaluatable.shape}")
    return evaluatable

# Type4Py

In [18]:
from experiments import pipeline, inferred
import importlib

type4py_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="type4pyN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/type4pyN1/all/inferred.csv: 100%|█|


Loaded 1541 inference artifacts


In [19]:
importlib.reload(pipeline)

type4py_adjusted = pipeline.factory(
    tool="type4pyN1", 
    groundtruth=groundtruth, 
    inferred=type4py_inferred, 
    form="adjusted"
)
type4py_base = pipeline.factory(
    tool="type4pyN1",
    groundtruth=groundtruth,
    inferred=type4py_inferred,
    form="base"
)

Initial prediction size: (1171690, 8)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1171690/1171690 [00:09<00:00, 125776.88it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1171690/1171690 [00:09<00:00, 128977.80it/s]


Size after joining predictions to groundtruth: (1054456, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (228305, 12)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1171690/1171690 [00:09<00:00, 126157.36it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1171690/1171690 [00:08<00:00, 133018.37it/s]


Deriving base form from adjusted form


100%|███████████████████████████████████████████████████████████████████| 1171690/1171690 [00:09<00:00, 128319.21it/s]


(1054456, 12)
Reduced to evaluatable: (228305, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [20]:
(t4py_adj_perf := experiments.predictions.performance(
    type4py_adjusted.replace("<MISSING>", pd.NA), 
    total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,118274,111036,7238,68273,0.577244,0.614873
common,59993,54324,5669,18017,0.300318,0.331658
rare,50038,45171,4867,7016,0.140213,0.155321
total,228305,210531,17774,93306,0.40869,0.443194


In [21]:
(t4py_base_perf := experiments.predictions.performance(
    type4py_base.replace("<MISSING>", pd.NA), 
    total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,118274,111036,7238,73178,0.618716,0.659048
common,59993,54324,5669,18834,0.313937,0.346698
rare,50038,45171,4867,7060,0.141093,0.156295
total,228305,210531,17774,99072,0.433946,0.470582


In [22]:
(t4py_cat_adj_perf := experiments.predictions.by_category_performance(
    type4py_adjusted.replace("<MISSING>", pd.NA), 
    total=True
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,71156,1950,47489,0.649591,0.667393
PARAMETER,common,36577,34623,1954,14429,0.394483,0.416746
PARAMETER,rare,29597,27617,1980,5921,0.200054,0.214397
PARAMETER,total,139280,133396,5884,67839,0.487069,0.508553
RETURN,ubiquitous,23448,21886,1562,11217,0.478378,0.512519
RETURN,common,18286,15734,2552,2056,0.112436,0.130672
RETURN,rare,11624,10311,1313,69,0.005936,0.006692
RETURN,total,53358,47931,5427,13342,0.250047,0.278358
VARIABLE,ubiquitous,21720,17994,3726,9567,0.44047,0.531677
VARIABLE,common,8236,6764,1472,2066,0.25085,0.305441


In [23]:
(t4py_cat_base_perf := experiments.predictions.by_category_performance(
    type4py_base.replace("<MISSING>", pd.NA),
    total=True
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,71156,1950,49039,0.670793,0.689176
PARAMETER,common,36577,34623,1954,14894,0.407196,0.430176
PARAMETER,rare,29597,27617,1980,5956,0.201237,0.215664
PARAMETER,total,139280,133396,5884,69889,0.501788,0.523921
RETURN,ubiquitous,23448,21886,1562,12611,0.537828,0.576213
RETURN,common,18286,15734,2552,2305,0.126053,0.146498
RETURN,rare,11624,10311,1313,70,0.006022,0.006789
RETURN,total,53358,47931,5427,14986,0.280858,0.312658
VARIABLE,ubiquitous,21720,17994,3726,11528,0.530755,0.640658
VARIABLE,common,8236,6764,1472,2174,0.263963,0.321407


# Typilus

In [24]:
typilus_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="typilusN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/typilusN1/all/inferred.csv: 100%|█|


Loaded 1544 inference artifacts


In [25]:
importlib.reload(pipeline)

typilus_adjusted = pipeline.factory(
    tool="typilusN1", 
    groundtruth=groundtruth, 
    inferred=typilus_inferred, 
    form="adjusted"
)
typilus_base = pipeline.factory(
    tool="typilusN1",
    groundtruth=groundtruth,
    inferred=typilus_inferred,
    form="base"
)

Initial prediction size: (1171654, 8)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1171654/1171654 [00:10<00:00, 116415.06it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1171654/1171654 [00:10<00:00, 115050.38it/s]


Size after joining predictions to groundtruth: (1054456, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (228305, 12)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1171654/1171654 [00:09<00:00, 118302.39it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1171654/1171654 [00:10<00:00, 115364.92it/s]


Deriving base form from adjusted form


100%|███████████████████████████████████████████████████████████████████| 1171654/1171654 [00:09<00:00, 117335.15it/s]


(1054456, 12)
Reduced to evaluatable: (228305, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [26]:
display(typilus_adj_perf := experiments.predictions.performance(
    typilus_adjusted.replace("<MISSING>", pd.NA), 
    total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,118274,111793,6481,66400,0.561408,0.593955
common,59993,56827,3166,9523,0.158735,0.167579
rare,50038,47612,2426,2764,0.055238,0.058053
total,228305,216232,12073,78687,0.344657,0.363901


In [27]:
display(typilus_base_perf := experiments.predictions.performance(
    typilus_base.replace("<MISSING>", pd.NA),
    total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,118274,111793,6481,76346,0.645501,0.682923
common,59993,56827,3166,12471,0.207874,0.219456
rare,50038,47612,2426,2785,0.055658,0.058494
total,228305,216232,12073,91602,0.401226,0.423628


In [28]:
(typilus_cat_adj_perf := experiments.predictions.by_category_performance(typilus_adjusted.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,70958,2148,45842,0.627062,0.646044
PARAMETER,common,36577,35051,1526,7549,0.206387,0.215372
PARAMETER,rare,29597,29010,587,2028,0.06852,0.069907
PARAMETER,total,139280,135019,4261,55419,0.397896,0.410453
RETURN,ubiquitous,23448,22161,1287,11321,0.482813,0.510852
RETURN,common,18286,17528,758,1406,0.076889,0.080215
RETURN,rare,11624,11036,588,195,0.016776,0.017669
RETURN,total,53358,50725,2633,12922,0.242175,0.254746
VARIABLE,ubiquitous,21720,18674,3046,9237,0.425276,0.494645
VARIABLE,common,8236,7130,1106,958,0.116319,0.134362


In [29]:
(typilus_cat_base_perf := experiments.predictions.by_category_performance(
    typilus_base.replace("<MISSING>", pd.NA), total=True
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,70958,2148,49537,0.677605,0.698117
PARAMETER,common,36577,35051,1526,8057,0.220275,0.229865
PARAMETER,rare,29597,29010,587,2048,0.069196,0.070596
PARAMETER,total,139280,135019,4261,59642,0.428217,0.44173
RETURN,ubiquitous,23448,22161,1287,14443,0.615959,0.651731
RETURN,common,18286,17528,758,3738,0.204419,0.213259
RETURN,rare,11624,11036,588,196,0.016862,0.01776
RETURN,total,53358,50725,2633,18377,0.344409,0.362287
VARIABLE,ubiquitous,21720,18674,3046,12366,0.569337,0.662204
VARIABLE,common,8236,7130,1106,1063,0.129068,0.149088


# TypeT5

In [30]:
typet5_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="TypeT5TopN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/TypeT5TopN1/all/inferred.csv: 100%|


Loaded 1420 inference artifacts


In [31]:
importlib.reload(pipeline)
typet5_adjusted = pipeline.factory(
    tool="TypeT5TopN1",
    groundtruth=groundtruth,
    inferred=typet5_inferred,
    form="adjusted"
)

Initial prediction size: (1113735, 8)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1113735/1113735 [00:05<00:00, 221635.67it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1113735/1113735 [00:05<00:00, 217294.89it/s]


Size after joining predictions to groundtruth: (584530, 12)
Reduced to evaluatable: (206369, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [32]:
typet5_base = pipeline.factory(
    tool="TypeT5TopN1",
    groundtruth=groundtruth,
    inferred=typet5_inferred,
    form="base"
)

Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1113735/1113735 [00:04<00:00, 222942.19it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1113735/1113735 [00:05<00:00, 216244.33it/s]


Deriving base form from adjusted form


100%|███████████████████████████████████████████████████████████████████| 1113735/1113735 [00:05<00:00, 217854.65it/s]


(584530, 12)
Reduced to evaluatable: (206369, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [33]:
display(tt5_adj_perf := experiments.predictions.performance(
    typet5_adjusted.replace("<MISSING>", pd.NA), total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,106637,86931,19706,72931,0.683918,0.838953
common,54929,44382,10547,29008,0.5281,0.653598
rare,44803,36186,8617,26586,0.593398,0.734704
total,206369,167499,38870,128525,0.622792,0.767318


In [34]:
display(tt5_base_perf := experiments.predictions.performance(
    typet5_base.replace("<MISSING>", pd.NA), total=True
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,106637,86931,19706,79107,0.741834,0.909998
common,54929,44382,10547,32724,0.595751,0.737326
rare,44803,36186,8617,26959,0.601723,0.745012
total,206369,167499,38870,138790,0.672533,0.828602


In [35]:
(tt5_cat_adj_perf := experiments.predictions.by_category_performance(typet5_adjusted.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,71509,58935,12574,50047,0.69987,0.84919
PARAMETER,common,35401,28833,6568,18688,0.527895,0.648146
PARAMETER,rare,28642,23458,5184,17434,0.608687,0.743201
PARAMETER,total,135552,111226,24326,86169,0.63569,0.77472
RETURN,ubiquitous,22850,18721,4129,15214,0.665821,0.81267
RETURN,common,17511,13992,3519,9360,0.534521,0.668954
RETURN,rare,11330,9053,2277,6901,0.609091,0.762289
RETURN,total,51691,41766,9925,31475,0.608907,0.753603
VARIABLE,ubiquitous,12278,9275,3003,7670,0.624695,0.826954
VARIABLE,common,4436,3385,1051,2019,0.45514,0.596455


In [36]:
(tt5_cat_base_perf := experiments.predictions.by_category_performance(typet5_base.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,71509,58935,12574,53361,0.746214,0.905421
PARAMETER,common,35401,28833,6568,20685,0.584306,0.717407
PARAMETER,rare,28642,23458,5184,17575,0.613609,0.749211
PARAMETER,total,135552,111226,24326,91621,0.67591,0.823737
RETURN,ubiquitous,22850,18721,4129,17551,0.768096,0.937503
RETURN,common,17511,13992,3519,11033,0.630061,0.788522
RETURN,rare,11330,9053,2277,6993,0.617211,0.772451
RETURN,total,51691,41766,9925,35577,0.688263,0.851817
VARIABLE,ubiquitous,12278,9275,3003,8195,0.667454,0.883558
VARIABLE,common,4436,3385,1051,2162,0.487376,0.6387


# HiTyperNoML

In [55]:
import importlib
importlib.reload(pipeline)

hityper_inferred = inferred.load_entire_inferred(
    artifact_root, dataset, tool_name="HiTyperNoML", task="all"
)
hityper_adjusted = pipeline.factory(
    tool="HiTyper",
    groundtruth=groundtruth,
    inferred=hityper_inferred,
    form="adjusted"
)
hityper_base = pipeline.factory(
    tool="HiTyper",
    groundtruth=groundtruth,
    inferred=hityper_inferred,
    form="base"
)

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/zwicker-group__numpy-py-pde/HiTyperNoML/all/inferred.csv: 100%|


Loaded 1450 inference artifacts
Initial prediction size: (1143210, 8)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1143210/1143210 [00:04<00:00, 248859.15it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1143210/1143210 [00:04<00:00, 275353.20it/s]


Size after joining predictions to groundtruth: (994923, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (204383, 12)
Deriving limited form


100%|███████████████████████████████████████████████████████████████████| 1143210/1143210 [00:04<00:00, 270989.41it/s]


Deriving adjusted form from limited form


100%|███████████████████████████████████████████████████████████████████| 1143210/1143210 [00:04<00:00, 274999.87it/s]


Deriving base form from adjusted form


100%|███████████████████████████████████████████████████████████████████| 1143210/1143210 [00:04<00:00, 266023.76it/s]


(994923, 12)
Reduced to evaluatable: (204383, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [56]:
(hitypernoml_adj_perf := experiments.predictions.performance(hityper_adjusted.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,104071,27152,76919,20282,0.194886,0.74698
common,54893,9172,45721,3121,0.056856,0.340275
rare,45419,5904,39515,3853,0.084832,0.652608
total,204383,42228,162155,27256,0.133357,0.645449


In [57]:
(hitypernoml_base_perf := experiments.predictions.performance(hityper_base.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,104071,27152,76919,25905,0.248917,0.954073
common,54893,9172,45721,5828,0.10617,0.635412
rare,45419,5904,39515,3931,0.08655,0.66582
total,204383,42228,162155,35664,0.174496,0.844558


In [39]:
(hitypernoml_cat_adj_perf := experiments.predictions.by_category_performance(hityper_adjusted.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,10857,62249,10604,0.14505,0.976697
PARAMETER,common,36577,2179,34398,1227,0.033546,0.563102
PARAMETER,rare,29597,310,29287,86,0.002906,0.277419
PARAMETER,total,139280,13346,125934,11917,0.085561,0.892927
RETURN,ubiquitous,23448,11726,11722,7886,0.336319,0.672523
RETURN,common,18286,6578,11708,1672,0.091436,0.254181
RETURN,rare,11624,4131,7493,2918,0.251032,0.706366
RETURN,total,53358,22435,30923,12476,0.233817,0.556095
VARIABLE,ubiquitous,21720,4787,16933,1562,0.071915,0.3263
VARIABLE,common,8236,1145,7091,634,0.076979,0.553712


In [41]:
(hitypernoml_cat_base_perf := experiments.predictions.by_category_performance(hityper_base.replace("<MISSING>", pd.NA), total=True))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,73106,10857,62249,10795,0.147662,0.994289
PARAMETER,common,36577,2179,34398,1379,0.037701,0.632859
PARAMETER,rare,29597,310,29287,86,0.002906,0.277419
PARAMETER,total,139280,13346,125934,12260,0.088024,0.918627
RETURN,ubiquitous,23448,11726,11722,11381,0.485372,0.970578
RETURN,common,18286,6578,11708,4079,0.223067,0.620097
RETURN,rare,11624,4131,7493,2945,0.253355,0.712902
RETURN,total,53358,22435,30923,18405,0.344934,0.82037
VARIABLE,ubiquitous,21720,4787,16933,3946,0.181676,0.824316
VARIABLE,common,8236,1145,7091,862,0.104662,0.752838


# Table Creation

In [42]:
per_model_columns = ["observations", "predictions", "matches", "stracc", "relacc"]
per_prediction_columns = ["observations", "predictions"]
per_form_columns = ["matches", "stracc", "relacc"]

In [43]:
adj_by_model_scarcity = pd.concat(
    [typilus_adj_perf[per_model_columns], t4py_adj_perf[per_model_columns], tt5_adj_perf[per_model_columns]],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
adj_by_model_scarcity.index = adj_by_model_scarcity.index.set_names(["model", "scarcity"])

adj_by_scarcity_model = pd.concat([g for _, g in adj_by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])
display(adj_by_scarcity_model)

Unnamed: 0_level_0,Unnamed: 1_level_0,observations,predictions,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ubiquitous,Typilus,118274,111793,66400,0.561408,0.593955
ubiquitous,Type4Py,118274,111036,68273,0.577244,0.614873
ubiquitous,TypeT5,106637,86931,72931,0.683918,0.838953
common,Typilus,59993,56827,9523,0.158735,0.167579
common,Type4Py,59993,54324,18017,0.300318,0.331658
common,TypeT5,54929,44382,29008,0.5281,0.653598
rare,Typilus,50038,47612,2764,0.055238,0.058053
rare,Type4Py,50038,45171,7016,0.140213,0.155321
rare,TypeT5,44803,36186,26586,0.593398,0.734704
total,Typilus,228305,216232,78687,0.344657,0.363901


In [44]:
print(adj_by_scarcity_model.to_latex(
    float_format="{:.2f}".format,
))

\begin{tabular}{llrrrrr}
\toprule
 &  & observations & predictions & matches & stracc & relacc \\
scarcity & model &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{ubiquitous} & Typilus & 118274 & 111793 & 66400 & 0.56 & 0.59 \\
 & Type4Py & 118274 & 111036 & 68273 & 0.58 & 0.61 \\
 & TypeT5 & 106637 & 86931 & 72931 & 0.68 & 0.84 \\
\cline{1-7}
\multirow[t]{3}{*}{common} & Typilus & 59993 & 56827 & 9523 & 0.16 & 0.17 \\
 & Type4Py & 59993 & 54324 & 18017 & 0.30 & 0.33 \\
 & TypeT5 & 54929 & 44382 & 29008 & 0.53 & 0.65 \\
\cline{1-7}
\multirow[t]{3}{*}{rare} & Typilus & 50038 & 47612 & 2764 & 0.06 & 0.06 \\
 & Type4Py & 50038 & 45171 & 7016 & 0.14 & 0.16 \\
 & TypeT5 & 44803 & 36186 & 26586 & 0.59 & 0.73 \\
\cline{1-7}
\multirow[t]{3}{*}{total} & Typilus & 228305 & 216232 & 78687 & 0.34 & 0.36 \\
 & Type4Py & 228305 & 210531 & 93306 & 0.41 & 0.44 \\
 & TypeT5 & 206369 & 167499 & 128525 & 0.62 & 0.77 \\
\cline{1-7}
\bottomrule
\end{tabular}



In [45]:
base_by_model_scarcity = pd.concat(
    [typilus_base_perf[per_model_columns], t4py_base_perf[per_model_columns], tt5_base_perf[per_model_columns]],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
base_by_model_scarcity.index = base_by_model_scarcity.index.set_names(["model", "scarcity"])

base_by_model_scarcity = pd.concat([g for _, g in base_by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])
display(base_by_model_scarcity)

Unnamed: 0_level_0,Unnamed: 1_level_0,observations,predictions,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ubiquitous,Typilus,118274,111793,76346,0.645501,0.682923
ubiquitous,Type4Py,118274,111036,73178,0.618716,0.659048
ubiquitous,TypeT5,106637,86931,79107,0.741834,0.909998
common,Typilus,59993,56827,12471,0.207874,0.219456
common,Type4Py,59993,54324,18834,0.313937,0.346698
common,TypeT5,54929,44382,32724,0.595751,0.737326
rare,Typilus,50038,47612,2785,0.055658,0.058494
rare,Type4Py,50038,45171,7060,0.141093,0.156295
rare,TypeT5,44803,36186,26959,0.601723,0.745012
total,Typilus,228305,216232,91602,0.401226,0.423628


In [46]:
print(base_by_model_scarcity.to_latex(
    float_format="{:.2f}".format,
))

\begin{tabular}{llrrrrr}
\toprule
 &  & observations & predictions & matches & stracc & relacc \\
scarcity & model &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{ubiquitous} & Typilus & 118274 & 111793 & 76346 & 0.65 & 0.68 \\
 & Type4Py & 118274 & 111036 & 73178 & 0.62 & 0.66 \\
 & TypeT5 & 106637 & 86931 & 79107 & 0.74 & 0.91 \\
\cline{1-7}
\multirow[t]{3}{*}{common} & Typilus & 59993 & 56827 & 12471 & 0.21 & 0.22 \\
 & Type4Py & 59993 & 54324 & 18834 & 0.31 & 0.35 \\
 & TypeT5 & 54929 & 44382 & 32724 & 0.60 & 0.74 \\
\cline{1-7}
\multirow[t]{3}{*}{rare} & Typilus & 50038 & 47612 & 2785 & 0.06 & 0.06 \\
 & Type4Py & 50038 & 45171 & 7060 & 0.14 & 0.16 \\
 & TypeT5 & 44803 & 36186 & 26959 & 0.60 & 0.75 \\
\cline{1-7}
\multirow[t]{3}{*}{total} & Typilus & 228305 & 216232 & 91602 & 0.40 & 0.42 \\
 & Type4Py & 228305 & 210531 & 99072 & 0.43 & 0.47 \\
 & TypeT5 & 206369 & 167499 & 138790 & 0.67 & 0.83 \\
\cline{1-7}
\bottomrule
\end{tabular}



In [47]:
typilus_combined = pd.concat(
    [typilus_adj_perf[per_prediction_columns], typilus_adj_perf[per_form_columns], typilus_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)
type4py_combined = pd.concat(
    [t4py_adj_perf[per_prediction_columns], t4py_adj_perf[per_form_columns], t4py_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)
typet5_combined = pd.concat(
    [tt5_adj_perf[per_prediction_columns], tt5_adj_perf[per_form_columns], tt5_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)

In [48]:
by_model_scarcity = pd.concat(
    [typilus_combined, type4py_combined, typet5_combined],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
by_model_scarcity.index = by_model_scarcity.index.set_names(["model", "scarcity"])
by_scarcity_model = pd.concat([g for _, g in by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])

In [49]:
display(by_scarcity_model)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,adjusted,adjusted,adjusted,base,base,base
Unnamed: 0_level_1,Unnamed: 1_level_1,observations,predictions,matches,stracc,relacc,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
ubiquitous,Typilus,118274,111793,66400,0.561408,0.593955,76346,0.645501,0.682923
ubiquitous,Type4Py,118274,111036,68273,0.577244,0.614873,73178,0.618716,0.659048
ubiquitous,TypeT5,106637,86931,72931,0.683918,0.838953,79107,0.741834,0.909998
common,Typilus,59993,56827,9523,0.158735,0.167579,12471,0.207874,0.219456
common,Type4Py,59993,54324,18017,0.300318,0.331658,18834,0.313937,0.346698
common,TypeT5,54929,44382,29008,0.5281,0.653598,32724,0.595751,0.737326
rare,Typilus,50038,47612,2764,0.055238,0.058053,2785,0.055658,0.058494
rare,Type4Py,50038,45171,7016,0.140213,0.155321,7060,0.141093,0.156295
rare,TypeT5,44803,36186,26586,0.593398,0.734704,26959,0.601723,0.745012
total,Typilus,228305,216232,78687,0.344657,0.363901,91602,0.401226,0.423628


In [50]:
print(by_scarcity_model.to_latex(
    float_format="{:.2f}".format,
))

\begin{tabular}{llrrrrrrrr}
\toprule
 &  & \multicolumn{2}{r}{} & \multicolumn{3}{r}{adjusted} & \multicolumn{3}{r}{base} \\
 &  & observations & predictions & matches & stracc & relacc & matches & stracc & relacc \\
scarcity & model &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{ubiquitous} & Typilus & 118274 & 111793 & 66400 & 0.56 & 0.59 & 76346 & 0.65 & 0.68 \\
 & Type4Py & 118274 & 111036 & 68273 & 0.58 & 0.61 & 73178 & 0.62 & 0.66 \\
 & TypeT5 & 106637 & 86931 & 72931 & 0.68 & 0.84 & 79107 & 0.74 & 0.91 \\
\cline{1-10}
\multirow[t]{3}{*}{common} & Typilus & 59993 & 56827 & 9523 & 0.16 & 0.17 & 12471 & 0.21 & 0.22 \\
 & Type4Py & 59993 & 54324 & 18017 & 0.30 & 0.33 & 18834 & 0.31 & 0.35 \\
 & TypeT5 & 54929 & 44382 & 29008 & 0.53 & 0.65 & 32724 & 0.60 & 0.74 \\
\cline{1-10}
\multirow[t]{3}{*}{rare} & Typilus & 50038 & 47612 & 2764 & 0.06 & 0.06 & 2785 & 0.06 & 0.06 \\
 & Type4Py & 50038 & 45171 & 7016 & 0.14 & 0.16 & 7060 & 0.14 & 0.16 \\
 & TypeT5 & 44803 & 36186 & 26586 