In [1]:
import pathlib
from scripts.infer.structure import CrossDomainTypes4Py

dataset = CrossDomainTypes4Py(pathlib.Path("/nfs/data/students/bsparks/mdti4py-dataset-pool/cdt4py"))
artifact_root = pathlib.Path("/nfs/home/bsparks/mdti4py/datasets")

2023-09-28 16:05:23.914848: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
display(len(ts := dataset.test_set()))

1550

In [3]:
display(len(el_vs := dataset.el_validation_set()))

1254

In [4]:
display(len(el_ts := dataset.el_test_set()))

297

In [5]:
import importlib
import experiments.inferred

groundtruth = experiments.inferred.load_groundtruths(artifact_root, dataset)

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/extended_ground_truth.csv: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:19<00:00, 80.30it/s]
/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/context.csv: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:09<00:00, 169.06it/s]


In [6]:
import pandas as pd
display(groundtruth.columns)
groundtruth = groundtruth.replace("<MISSING>", pd.NA)

Index(['file', 'category', 'qname', 'qname_ssa', 'raw_anno',
       'depth_limited_anno', 'adjusted_anno', 'base_anno', 'repository',
       'context_category', 'nested'],
      dtype='object')

In [7]:
groundtruth.adjusted_anno.value_counts().head(n=10)

adjusted_anno
str          50804
None         25566
int          22757
bool         14299
float         8582
Dict          5654
Any           3960
List[str]     3939
ndarray       3926
List          3291
Name: count, dtype: int64

In [8]:
import experiments.predictions
trait_form = groundtruth.rename(columns={"adjusted_anno": "trait_gt_form"})
trait_form = trait_form[~trait_form.trait_gt_form.isin(["None", "Any"])]

ubiq_types = experiments.predictions.ubiquitous_types(trait_form)["trait_gt_form"]
common_types = experiments.predictions.common_types(trait_form)["trait_gt_form"]
rare_types = experiments.predictions.rare_types(trait_form)["trait_gt_form"]

In [9]:
ubiq_types.value_counts()

trait_gt_form
str      50804
int      22757
bool     14299
float     8582
Dict      5654
Name: count, dtype: int64

In [10]:
common_types.value_counts()

trait_gt_form
List[str]             3939
ndarray               3926
List                  3291
Dict[str, Any]        2993
DataFrame             2539
                      ... 
Order                  104
Dict[str, Tuple]       102
Token                  101
Dict[int, List]        101
Callable[[], None]     100
Name: count, Length: 143, dtype: int64

In [11]:
rare_types.value_counts()

trait_gt_form
UID                             99
List[Token]                     99
File                            99
Election                        99
Mock                            98
                                ..
Dict[int, bytearray]             1
Union[bytes, int, str, List]     1
ResolvedImage                    1
Tuple[str, bytes]                1
Quarter                          1
Name: count, Length: 15432, dtype: int64

In [12]:
from scripts.common.schemas import TypeCollectionCategory, RepositoryTypeCollectionSchema
import experiments.predictions

def statistics(split: pd.DataFrame) -> pd.DataFrame:
    split = split[~split["base_anno"].isin(["None", "Any"])]
    split = split[~split["qname"].str.endswith(
        (
            ".self",
            ".cls",
            ".args",
            ".kwargs",
            ".__init__",
            ".__len__",
            ".__str__",
            ".__repr__",
            ".__bool__",
            ".__float__",
            ".__int__",
            "._",
        )
    )]

    rows = []
    keys = []

    category2str = {
        TypeCollectionCategory.CALLABLE_RETURN: "Functions",
        TypeCollectionCategory.CALLABLE_PARAMETER: "Parameters",
        TypeCollectionCategory.VARIABLE: "Variables",
    }
    
    for key, group in split.groupby(by=RepositoryTypeCollectionSchema.category, sort=False):
        keys.append(category2str[key])
        rows.append(pd.Series([
            size := group[RepositoryTypeCollectionSchema.adjusted_anno].size,
            count := group[RepositoryTypeCollectionSchema.adjusted_anno].count(),
            ratio := count / size,
        ], index=["Total", "Annotated", "Annotated Ratio"]))

    df = pd.concat(rows, keys=keys)
                    
    scarcity = []
    scarcity_keys = []

    for key, scarcity_types in zip(
        ["Ubiquitous", "Common", "Rare"],
        [ubiq_types, common_types, rare_types]
    ):
        scarcity_keys.append(key)
        scarcity.append(pd.Series(
            split[RepositoryTypeCollectionSchema.adjusted_anno].isin(scarcity_types).sum(),
            index=["Count"],
        ))

    rare_types_in_split = split[RepositoryTypeCollectionSchema.adjusted_anno].isin(rare_types)
    rare_type_count = split[rare_types_in_split].adjusted_anno.nunique()

    scarcity_keys.append("Unique Rare")
    scarcity.append(pd.Series([rare_type_count], index=["Count"]))

    scarcity_df = pd.concat(scarcity, keys=scarcity_keys)

    repository_rows = []
    repository_keys = []
    
    repository_rows.append(pd.Series(
        [repo := split.repository.nunique(), file := (split.repository + split.file).nunique(), ratio := split.groupby(by=["repository"]).file.nunique().mean()],
        index=["Repositories", "Deduplicated Python Files", "Deduplicated File to Repository Ratio"]
    ))
            
    repository_keys.append("Projects")
    repository_df = pd.concat(repository_rows, keys=repository_keys)
    
    return pd.concat([repository_df, df, scarcity_df])

In [13]:
display(cdt4py_dd := statistics(groundtruth))

Projects     Repositories                               1544.000000
             Deduplicated Python Files                 26650.000000
             Deduplicated File to Repository Ratio        17.260363
Functions    Total                                    158295.000000
             Annotated                                 53354.000000
             Annotated Ratio                               0.337054
Parameters   Total                                    240148.000000
             Annotated                                139263.000000
             Annotated Ratio                               0.579905
Variables    Total                                    524646.000000
             Annotated                                 35667.000000
             Annotated Ratio                               0.067983
Ubiquitous   Count                                    100719.000000
Common       Count                                     58706.000000
Rare         Count                              

In [14]:
validation_ars = list(map(lambda r: str(dataset.author_repo(r)), el_vs.keys()))
validation_gt = groundtruth[groundtruth[RepositoryTypeCollectionSchema.repository].isin(validation_ars)]

display(cdt4py_el_valid := statistics(validation_gt))

Projects     Repositories                               1248.000000
             Deduplicated Python Files                 21313.000000
             Deduplicated File to Repository Ratio        17.077724
Functions    Total                                    122972.000000
             Annotated                                 43169.000000
             Annotated Ratio                               0.351047
Parameters   Total                                    191669.000000
             Annotated                                114236.000000
             Annotated Ratio                               0.596007
Variables    Total                                    415061.000000
             Annotated                                 29317.000000
             Annotated Ratio                               0.070633
Ubiquitous   Count                                     81848.000000
Common       Count                                     48123.000000
Rare         Count                              

In [15]:
test_ars = list(map(lambda r: str(dataset.author_repo(r)), el_ts.keys()))
test_gt = groundtruth[groundtruth[RepositoryTypeCollectionSchema.repository].isin(test_ars)]

display(cdt4py_el_test := statistics(test_gt))

Projects     Repositories                                296.000000
             Deduplicated Python Files                  5337.000000
             Deduplicated File to Repository Ratio        18.030405
Functions    Total                                     35323.000000
             Annotated                                 10185.000000
             Annotated Ratio                               0.288339
Parameters   Total                                     48479.000000
             Annotated                                 25027.000000
             Annotated Ratio                               0.516244
Variables    Total                                    109585.000000
             Annotated                                  6350.000000
             Annotated Ratio                               0.057946
Ubiquitous   Count                                     18871.000000
Common       Count                                     10583.000000
Rare         Count                              

In [16]:
validation_rare_types = set(validation_gt[validation_gt[RepositoryTypeCollectionSchema.adjusted_anno].isin(rare_types)].adjusted_anno.unique())
test_rare_types = set(test_gt[test_gt[RepositoryTypeCollectionSchema.adjusted_anno].isin(rare_types)].adjusted_anno.unique())

In [17]:
print(len(validation_rare_types.difference(test_rare_types)))
print(len(test_rare_types.difference(validation_rare_types)))

12186
2321


In [18]:
display(cdt4py_table := pd.concat(
    [cdt4py_dd, cdt4py_el_valid, cdt4py_el_test],
    keys=["CDT4Py-DD", "CDT4Py-EL-Valid", "CDT4Py-EL-Test"],
    axis=1
))

Unnamed: 0,Unnamed: 1,CDT4Py-DD,CDT4Py-EL-Valid,CDT4Py-EL-Test
Projects,Repositories,1544.0,1248.0,296.0
Projects,Deduplicated Python Files,26650.0,21313.0,5337.0
Projects,Deduplicated File to Repository Ratio,17.260363,17.077724,18.030405
Functions,Total,158295.0,122972.0,35323.0
Functions,Annotated,53354.0,43169.0,10185.0
Functions,Annotated Ratio,0.337054,0.351047,0.288339
Parameters,Total,240148.0,191669.0,48479.0
Parameters,Annotated,139263.0,114236.0,25027.0
Parameters,Annotated Ratio,0.579905,0.596007,0.516244
Variables,Total,524646.0,415061.0,109585.0


In [19]:
print(cdt4py_table \
    .to_latex(float_format="{:.2f}".format)
 )

\begin{tabular}{llrrr}
\toprule
 &  & CDT4Py-DD & CDT4Py-EL-Valid & CDT4Py-EL-Test \\
\midrule
\multirow[t]{3}{*}{Projects} & Repositories & 1544.00 & 1248.00 & 296.00 \\
 & Deduplicated Python Files & 26650.00 & 21313.00 & 5337.00 \\
 & Deduplicated File to Repository Ratio & 17.26 & 17.08 & 18.03 \\
\cline{1-5}
\multirow[t]{3}{*}{Functions} & Total & 158295.00 & 122972.00 & 35323.00 \\
 & Annotated & 53354.00 & 43169.00 & 10185.00 \\
 & Annotated Ratio & 0.34 & 0.35 & 0.29 \\
\cline{1-5}
\multirow[t]{3}{*}{Parameters} & Total & 240148.00 & 191669.00 & 48479.00 \\
 & Annotated & 139263.00 & 114236.00 & 25027.00 \\
 & Annotated Ratio & 0.58 & 0.60 & 0.52 \\
\cline{1-5}
\multirow[t]{3}{*}{Variables} & Total & 524646.00 & 415061.00 & 109585.00 \\
 & Annotated & 35667.00 & 29317.00 & 6350.00 \\
 & Annotated Ratio & 0.07 & 0.07 & 0.06 \\
\cline{1-5}
Ubiquitous & Count & 100719.00 & 81848.00 & 18871.00 \\
\cline{1-5}
Common & Count & 58706.00 & 48123.00 & 10583.00 \\
\cline{1-5}
Rare & Coun

# Type4Py

In [20]:
from experiments import pipeline, inferred
import importlib

importlib.reload(experiments.pipeline)
importlib.reload(experiments.inferred)

type4py_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="type4pyN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/type4pyN1/all/inferred.csv: 100%|███████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:10<00:00, 149.77it/s]


Loaded 1539 inference artifacts


In [21]:
type4py_adjusted = pipeline.factory(
    tool="type4pyN1", 
    groundtruth=groundtruth, 
    inferred=type4py_inferred, 
    form="adjusted"
)
type4py_base = pipeline.factory(
    tool="type4pyN1",
    groundtruth=groundtruth,
    inferred=type4py_inferred,
    form="base"
)

Initial prediction size: (1171294, 8)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171294/1171294 [00:10<00:00, 113348.87it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171294/1171294 [00:10<00:00, 111566.10it/s]


Size after joining predictions to groundtruth: (1054110, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (228284, 12)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171294/1171294 [00:10<00:00, 115051.20it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171294/1171294 [00:09<00:00, 123296.12it/s]


Deriving base form from adjusted form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171294/1171294 [00:09<00:00, 118445.68it/s]


(1054110, 12)
Reduced to evaluatable: (228284, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [22]:
type4py_adjusted.anno

1                str
3               bool
4               bool
6                str
7                str
             ...    
1054015          int
1054016          str
1054022    <MISSING>
1054023          int
1054024          str
Name: anno, Length: 228284, dtype: object

In [23]:
(t4py_adj_perf := experiments.predictions.performance(
    type4py_adjusted.replace("<MISSING>", pd.NA), 
    total=True,
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,100719,95761,4958,67438,0.669566,0.704232
common,58706,53521,5185,17570,0.299288,0.328282
rare,68859,61228,7631,8286,0.120333,0.13533
total,228284,210510,17774,93294,0.408675,0.443181


In [24]:
(t4py_base_perf := experiments.predictions.performance(
    type4py_base.replace("<MISSING>", pd.NA), 
    total=True,
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,100719,95761,4958,67997,0.675116,0.71007
common,58706,53521,5185,20530,0.349709,0.383588
rare,68859,61228,7631,10533,0.152965,0.172029
total,228284,210510,17774,99060,0.433933,0.470571


In [25]:
(t4py_cat_adj_perf := experiments.predictions.by_category_performance(
    type4py_adjusted.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,67160,65480,1680,46738,0.69592,0.713775
PARAMETER,common,34641,32673,1968,13920,0.401836,0.42604
PARAMETER,rare,37462,35226,2236,7170,0.191394,0.203543
PARAMETER,total,139263,133379,5884,67828,0.48705,0.508536
RETURN,ubiquitous,17530,16412,1118,11323,0.645921,0.689922
RETURN,common,15434,14118,1316,1922,0.12453,0.136138
RETURN,rare,20390,17397,2993,96,0.004708,0.005518
RETURN,total,53354,47927,5427,13341,0.250047,0.278361
VARIABLE,ubiquitous,16029,13869,2160,9377,0.585002,0.676112
VARIABLE,common,8631,6730,1901,1728,0.200209,0.256761


In [26]:
(t4py_cat_base_perf := experiments.predictions.by_category_performance(
    type4py_base.replace("<MISSING>", pd.NA),
    total=True,
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,67160,65480,1680,47020,0.700119,0.718082
PARAMETER,common,34641,32673,1968,14874,0.429376,0.455238
PARAMETER,rare,37462,35226,2236,7984,0.213123,0.226651
PARAMETER,total,139263,133379,5884,69878,0.50177,0.523906
RETURN,ubiquitous,17530,16412,1118,11470,0.654307,0.698879
RETURN,common,15434,14118,1316,2833,0.183556,0.200666
RETURN,rare,20390,17397,2993,682,0.033448,0.039202
RETURN,total,53354,47927,5427,14985,0.28086,0.312663
VARIABLE,ubiquitous,16029,13869,2160,9507,0.593112,0.685486
VARIABLE,common,8631,6730,1901,2823,0.327077,0.419465


# Typilus

In [27]:
typilus_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="typilusN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/typilusN1/all/inferred.csv: 100%|███████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:09<00:00, 161.70it/s]


Loaded 1542 inference artifacts


In [28]:
importlib.reload(pipeline)

typilus_adjusted = pipeline.factory(
    tool="typilusN1", 
    groundtruth=groundtruth, 
    inferred=typilus_inferred, 
    form="adjusted"
)
typilus_base = pipeline.factory(
    tool="typilusN1",
    groundtruth=groundtruth,
    inferred=typilus_inferred,
    form="base"
)

Initial prediction size: (1171258, 8)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171258/1171258 [00:10<00:00, 112470.13it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171258/1171258 [00:11<00:00, 105426.70it/s]


Size after joining predictions to groundtruth: (1012573, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (222302, 12)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171258/1171258 [00:10<00:00, 112334.18it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171258/1171258 [00:11<00:00, 104049.53it/s]


Deriving base form from adjusted form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1171258/1171258 [00:11<00:00, 101535.16it/s]


(1012573, 12)
Reduced to evaluatable: (222302, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [29]:
display(typilus_adj_perf := experiments.predictions.performance(
    typilus_adjusted.replace("<MISSING>", pd.NA), 
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,98454,93892,4562,64902,0.659211,0.691241
common,56758,53850,2908,9557,0.168382,0.177474
rare,67090,63440,3650,2744,0.0409,0.043253
total,222302,211182,11120,77203,0.347289,0.365576


In [30]:
display(typilus_base_perf := experiments.predictions.performance(
    typilus_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,98454,93892,4562,65994,0.670303,0.702871
common,56758,53850,2908,15524,0.273512,0.288282
rare,67090,63440,3650,8312,0.123893,0.131021
total,222302,211182,11120,89830,0.40409,0.425368


In [31]:
(typilus_cat_adj_perf := experiments.predictions.by_category_performance(
    typilus_adjusted.replace("<MISSING>", pd.NA), 
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,63992,1686,45717,0.696078,0.714417
PARAMETER,common,33307,32338,969,6604,0.198277,0.204218
PARAMETER,rare,36550,35810,740,2108,0.057674,0.058866
PARAMETER,total,135535,132140,3395,54429,0.401586,0.411904
RETURN,ubiquitous,16996,15875,1121,10405,0.612203,0.655433
RETURN,common,14879,14322,557,1731,0.116338,0.120863
RETURN,rare,19812,18887,925,428,0.021603,0.022661
RETURN,total,51687,49084,2603,12564,0.243079,0.255969
VARIABLE,ubiquitous,15780,14025,1755,8780,0.556401,0.626025
VARIABLE,common,8572,7190,1382,1222,0.142557,0.169958


In [32]:
(typilus_cat_base_perf := experiments.predictions.by_category_performance(
    typilus_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,63992,1686,46271,0.704513,0.723075
PARAMETER,common,33307,32338,969,8794,0.264029,0.27194
PARAMETER,rare,36550,35810,740,3462,0.09472,0.096677
PARAMETER,total,135535,132140,3395,58527,0.431822,0.442917
RETURN,ubiquitous,16996,15875,1121,10662,0.627324,0.671622
RETURN,common,14879,14322,557,3883,0.260972,0.271121
RETURN,rare,19812,18887,925,3339,0.168534,0.176788
RETURN,total,51687,49084,2603,17884,0.346006,0.364355
VARIABLE,ubiquitous,15780,14025,1755,9061,0.574208,0.646061
VARIABLE,common,8572,7190,1382,2847,0.332128,0.395967


# TypeT5

In [33]:
typet5_inferred = inferred.load_entire_inferred(artifact_root, dataset, tool_name="TypeT5TopN1", task="all")

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/TypeT5TopN1/all/inferred.csv: 100%|█████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:09<00:00, 155.08it/s]


Loaded 1483 inference artifacts


In [34]:
importlib.reload(pipeline)
typet5_adjusted = pipeline.factory(
    tool="TypeT5TopN1",
    groundtruth=groundtruth,
    inferred=typet5_inferred,
    form="adjusted"
)

Initial prediction size: (1151363, 8)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151363/1151363 [00:05<00:00, 208828.23it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151363/1151363 [00:05<00:00, 197932.07it/s]


Size after joining predictions to groundtruth: (584320, 12)
Reduced to evaluatable: (206348, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [35]:
typet5_base = pipeline.factory(
    tool="TypeT5TopN1",
    groundtruth=groundtruth,
    inferred=typet5_inferred,
    form="base"
)

Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151363/1151363 [00:05<00:00, 208774.97it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151363/1151363 [00:05<00:00, 200446.83it/s]


Deriving base form from adjusted form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1151363/1151363 [00:05<00:00, 196918.51it/s]


(584320, 12)
Reduced to evaluatable: (206348, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [36]:
display(tt5_adj_perf := experiments.predictions.performance(
    typet5_adjusted.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,94019,79419,14600,71198,0.757272,0.896486
common,51345,43660,7685,29432,0.57322,0.674118
rare,60984,50725,10259,32656,0.535485,0.643785
total,206348,173804,32544,133286,0.645928,0.766875


In [37]:
display(tt5_base_perf := experiments.predictions.performance(
    typet5_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,94019,79419,14600,72412,0.770185,0.911772
common,51345,43660,7685,34085,0.663843,0.780692
rare,60984,50725,10259,37435,0.61385,0.737999
total,206348,173804,32544,143932,0.697521,0.828128


In [38]:
(tt5_cat_adj_perf := experiments.predictions.by_category_performance(
    typet5_adjusted.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,56431,9247,50619,0.770715,0.897007
PARAMETER,common,33307,28264,5043,18803,0.564536,0.665263
PARAMETER,rare,36550,30725,5825,19912,0.544788,0.648072
PARAMETER,total,135535,115420,20115,89334,0.659121,0.773991
RETURN,ubiquitous,16996,14364,2632,13059,0.768357,0.909148
RETURN,common,14879,12807,2072,9098,0.611466,0.710393
RETURN,rare,19812,16284,3528,10603,0.535181,0.65113
RETURN,total,51687,43455,8232,32760,0.633815,0.753883
VARIABLE,ubiquitous,11345,8624,2721,7520,0.662847,0.871985
VARIABLE,common,3159,2589,570,1531,0.484647,0.591348


In [39]:
(tt5_cat_base_perf := experiments.predictions.by_category_performance(
    typet5_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,56431,9247,51451,0.783383,0.911751
PARAMETER,common,33307,28264,5043,21412,0.642868,0.757571
PARAMETER,rare,36550,30725,5825,22140,0.605746,0.720586
PARAMETER,total,135535,115420,20115,95003,0.700948,0.823107
RETURN,ubiquitous,16996,14364,2632,13383,0.787421,0.931704
RETURN,common,14879,12807,2072,10852,0.72935,0.847349
RETURN,rare,19812,16284,3528,12765,0.644306,0.783898
RETURN,total,51687,43455,8232,37000,0.715847,0.851456
VARIABLE,ubiquitous,11345,8624,2721,7578,0.667959,0.878711
VARIABLE,common,3159,2589,570,1821,0.576448,0.70336


# HiTyperNoML

In [40]:
import importlib
importlib.reload(pipeline)

hityper_inferred = inferred.load_entire_inferred(
    artifact_root, dataset, tool_name="HiTyperNoML", task="all"
)
hityper_adjusted = pipeline.factory(
    tool="HiTyper",
    groundtruth=groundtruth,
    inferred=hityper_inferred,
    form="adjusted"
)
hityper_base = pipeline.factory(
    tool="HiTyper",
    groundtruth=groundtruth,
    inferred=hityper_inferred,
    form="base"
)

/nfs/home/bsparks/mdti4py/datasets/CrossDomainTypes4Py/chrisc36__umpy-allennlp/HiTyperNoML/all/inferred.csv: 100%|█████████████████████████████████████████████████████████████████████████████████████| 1550/1550 [00:10<00:00, 147.93it/s]


Loaded 1448 inference artifacts
Initial prediction size: (1142814, 8)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1142814/1142814 [00:04<00:00, 261159.39it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1142814/1142814 [00:04<00:00, 256867.36it/s]


Size after joining predictions to groundtruth: (1012573, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


Reduced to evaluatable: (222302, 12)
Deriving limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1142814/1142814 [00:04<00:00, 257041.79it/s]


Deriving adjusted form from limited form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1142814/1142814 [00:04<00:00, 256310.68it/s]


Deriving base form from adjusted form


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1142814/1142814 [00:04<00:00, 234848.90it/s]


(1012573, 12)
Reduced to evaluatable: (222302, 12)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned[clean_annos] = cleaned[clean_annos].fillna("<MISSING>")


In [41]:
(hitypernoml_adj_perf := experiments.predictions.performance(
    hityper_adjusted.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,98454,21504,76950,20230,0.205477,0.940755
common,56758,8348,48410,2732,0.048134,0.327264
rare,67090,12371,54719,4291,0.063959,0.34686
total,222302,42223,180079,27253,0.122594,0.645454


In [42]:
(hitypernoml_base_perf := experiments.predictions.performance(
    hityper_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,observations,predictions,unassigned,matches,stracc,relacc
ubiquitous,98454,21504,76950,20606,0.209296,0.95824
common,56758,8348,48410,6564,0.115649,0.786296
rare,67090,12371,54719,8489,0.126532,0.686202
total,222302,42223,180079,35659,0.160408,0.84454


In [43]:
(hitypernoml_cat_adj_perf := experiments.predictions.by_category_performance(
    hityper_adjusted.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,11921,53757,11526,0.175493,0.966865
PARAMETER,common,33307,635,32672,118,0.003543,0.185827
PARAMETER,rare,36550,589,35961,77,0.002107,0.13073
PARAMETER,total,135535,13145,122390,11721,0.08648,0.89167
RETURN,ubiquitous,16996,8096,8900,7379,0.434161,0.911438
RETURN,common,14879,5162,9717,1915,0.128705,0.37098
RETURN,rare,19812,9143,10669,3487,0.176004,0.381385
RETURN,total,51687,22401,29286,12781,0.247277,0.570555
VARIABLE,ubiquitous,15780,1487,14293,1325,0.083967,0.891056
VARIABLE,common,8572,2551,6021,699,0.081545,0.27401


In [44]:
(hitypernoml_cat_base_perf := experiments.predictions.by_category_performance(
    hityper_base.replace("<MISSING>", pd.NA),
    ubiq_types=ubiq_types,
    comm_types=common_types,
    rare_types=rare_types,
    total=True,
))

Unnamed: 0,Unnamed: 1,observations,predictions,unassigned,matches,stracc,relacc
PARAMETER,ubiquitous,65678,11921,53757,11531,0.175569,0.967285
PARAMETER,common,33307,635,32672,389,0.011679,0.612598
PARAMETER,rare,36550,589,35961,139,0.003803,0.235993
PARAMETER,total,135535,13145,122390,12059,0.088973,0.917383
RETURN,ubiquitous,16996,8096,8900,7727,0.454636,0.954422
RETURN,common,14879,5162,9717,4210,0.282949,0.815575
RETURN,rare,19812,9143,10669,6438,0.324955,0.704145
RETURN,total,51687,22401,29286,18375,0.355505,0.820276
VARIABLE,ubiquitous,15780,1487,14293,1348,0.085425,0.906523
VARIABLE,common,8572,2551,6021,1965,0.229235,0.770286


# Table Creation

In [45]:
per_model_columns = ["observations", "predictions", "matches", "stracc", "relacc"]
per_prediction_columns = ["observations", "predictions"]
per_form_columns = ["matches", "stracc", "relacc"]

In [46]:
adj_by_model_scarcity = pd.concat(
    [typilus_adj_perf[per_model_columns], t4py_adj_perf[per_model_columns], tt5_adj_perf[per_model_columns]],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
adj_by_model_scarcity.index = adj_by_model_scarcity.index.set_names(["model", "scarcity"])

adj_by_scarcity_model = pd.concat([g for _, g in adj_by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])
display(adj_by_scarcity_model)

Unnamed: 0_level_0,Unnamed: 1_level_0,observations,predictions,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ubiquitous,Typilus,98454,93892,64902,0.659211,0.691241
ubiquitous,Type4Py,100719,95761,67438,0.669566,0.704232
ubiquitous,TypeT5,94019,79419,71198,0.757272,0.896486
common,Typilus,56758,53850,9557,0.168382,0.177474
common,Type4Py,58706,53521,17570,0.299288,0.328282
common,TypeT5,51345,43660,29432,0.57322,0.674118
rare,Typilus,67090,63440,2744,0.0409,0.043253
rare,Type4Py,68859,61228,8286,0.120333,0.13533
rare,TypeT5,60984,50725,32656,0.535485,0.643785
total,Typilus,222302,211182,77203,0.347289,0.365576


In [48]:
base_by_model_scarcity = pd.concat(
    [typilus_base_perf[per_model_columns], t4py_base_perf[per_model_columns], tt5_base_perf[per_model_columns]],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
base_by_model_scarcity.index = base_by_model_scarcity.index.set_names(["model", "scarcity"])

base_by_model_scarcity = pd.concat([g for _, g in base_by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])
display(base_by_model_scarcity)

Unnamed: 0_level_0,Unnamed: 1_level_0,observations,predictions,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ubiquitous,Typilus,98454,93892,65994,0.670303,0.702871
ubiquitous,Type4Py,100719,95761,67997,0.675116,0.71007
ubiquitous,TypeT5,94019,79419,72412,0.770185,0.911772
common,Typilus,56758,53850,15524,0.273512,0.288282
common,Type4Py,58706,53521,20530,0.349709,0.383588
common,TypeT5,51345,43660,34085,0.663843,0.780692
rare,Typilus,67090,63440,8312,0.123893,0.131021
rare,Type4Py,68859,61228,10533,0.152965,0.172029
rare,TypeT5,60984,50725,37435,0.61385,0.737999
total,Typilus,222302,211182,89830,0.40409,0.425368


In [49]:
print(base_by_model_scarcity.to_latex(
    float_format="{:.2f}".format,
))

\begin{tabular}{llrrrrr}
\toprule
 &  & observations & predictions & matches & stracc & relacc \\
scarcity & model &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{ubiquitous} & Typilus & 98454 & 93892 & 65994 & 0.67 & 0.70 \\
 & Type4Py & 100719 & 95761 & 67997 & 0.68 & 0.71 \\
 & TypeT5 & 94019 & 79419 & 72412 & 0.77 & 0.91 \\
\cline{1-7}
\multirow[t]{3}{*}{common} & Typilus & 56758 & 53850 & 15524 & 0.27 & 0.29 \\
 & Type4Py & 58706 & 53521 & 20530 & 0.35 & 0.38 \\
 & TypeT5 & 51345 & 43660 & 34085 & 0.66 & 0.78 \\
\cline{1-7}
\multirow[t]{3}{*}{rare} & Typilus & 67090 & 63440 & 8312 & 0.12 & 0.13 \\
 & Type4Py & 68859 & 61228 & 10533 & 0.15 & 0.17 \\
 & TypeT5 & 60984 & 50725 & 37435 & 0.61 & 0.74 \\
\cline{1-7}
\multirow[t]{3}{*}{total} & Typilus & 222302 & 211182 & 89830 & 0.40 & 0.43 \\
 & Type4Py & 228284 & 210510 & 99060 & 0.43 & 0.47 \\
 & TypeT5 & 206348 & 173804 & 143932 & 0.70 & 0.83 \\
\cline{1-7}
\bottomrule
\end{tabular}



In [50]:
typilus_combined = pd.concat(
    [typilus_adj_perf[per_prediction_columns], typilus_adj_perf[per_form_columns], typilus_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)
type4py_combined = pd.concat(
    [t4py_adj_perf[per_prediction_columns], t4py_adj_perf[per_form_columns], t4py_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)
typet5_combined = pd.concat(
    [tt5_adj_perf[per_prediction_columns], tt5_adj_perf[per_form_columns], tt5_base_perf[per_form_columns]],
    keys=["", "adjusted", "base"],
    axis=1
)

In [51]:
by_model_scarcity = pd.concat(
    [typilus_combined, type4py_combined, typet5_combined],
    keys=["Typilus", "Type4Py", "TypeT5"],
)
by_model_scarcity.index = by_model_scarcity.index.set_names(["model", "scarcity"])
by_scarcity_model = pd.concat([g for _, g in by_model_scarcity.swaplevel().groupby("scarcity", sort=False)])

In [52]:
display(by_scarcity_model)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,adjusted,adjusted,adjusted,base,base,base
Unnamed: 0_level_1,Unnamed: 1_level_1,observations,predictions,matches,stracc,relacc,matches,stracc,relacc
scarcity,model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
ubiquitous,Typilus,98454,93892,64902,0.659211,0.691241,65994,0.670303,0.702871
ubiquitous,Type4Py,100719,95761,67438,0.669566,0.704232,67997,0.675116,0.71007
ubiquitous,TypeT5,94019,79419,71198,0.757272,0.896486,72412,0.770185,0.911772
common,Typilus,56758,53850,9557,0.168382,0.177474,15524,0.273512,0.288282
common,Type4Py,58706,53521,17570,0.299288,0.328282,20530,0.349709,0.383588
common,TypeT5,51345,43660,29432,0.57322,0.674118,34085,0.663843,0.780692
rare,Typilus,67090,63440,2744,0.0409,0.043253,8312,0.123893,0.131021
rare,Type4Py,68859,61228,8286,0.120333,0.13533,10533,0.152965,0.172029
rare,TypeT5,60984,50725,32656,0.535485,0.643785,37435,0.61385,0.737999
total,Typilus,222302,211182,77203,0.347289,0.365576,89830,0.40409,0.425368


In [53]:
print(by_scarcity_model.to_latex(
    float_format="{:.2f}".format,
))

\begin{tabular}{llrrrrrrrr}
\toprule
 &  & \multicolumn{2}{r}{} & \multicolumn{3}{r}{adjusted} & \multicolumn{3}{r}{base} \\
 &  & observations & predictions & matches & stracc & relacc & matches & stracc & relacc \\
scarcity & model &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{ubiquitous} & Typilus & 98454 & 93892 & 64902 & 0.66 & 0.69 & 65994 & 0.67 & 0.70 \\
 & Type4Py & 100719 & 95761 & 67438 & 0.67 & 0.70 & 67997 & 0.68 & 0.71 \\
 & TypeT5 & 94019 & 79419 & 71198 & 0.76 & 0.90 & 72412 & 0.77 & 0.91 \\
\cline{1-10}
\multirow[t]{3}{*}{common} & Typilus & 56758 & 53850 & 9557 & 0.17 & 0.18 & 15524 & 0.27 & 0.29 \\
 & Type4Py & 58706 & 53521 & 17570 & 0.30 & 0.33 & 20530 & 0.35 & 0.38 \\
 & TypeT5 & 51345 & 43660 & 29432 & 0.57 & 0.67 & 34085 & 0.66 & 0.78 \\
\cline{1-10}
\multirow[t]{3}{*}{rare} & Typilus & 67090 & 63440 & 2744 & 0.04 & 0.04 & 8312 & 0.12 & 0.13 \\
 & Type4Py & 68859 & 61228 & 8286 & 0.12 & 0.14 & 10533 & 0.15 & 0.17 \\
 & TypeT5 & 60984 & 50725 & 32656 & 0