In [1]:
# eval on predictions
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from evaluator.CodeBLEU.calc_code_bleu import get_codebleu

In [2]:
def evaluate_codebleu(pred_filename, weights="0.25,0.25,0.25,0.25"):
    pred_df = pd.read_csv(pred_filename)
    # a list of gold codes (which is just some variants of the same code, we can use every code of different styles)
    refs = [
        pred_df["labels"]
    ]
    # the prediction code
    hyp = pred_df["preds"]
    score = get_codebleu(refs, hyp, "python", weights)
    return score

In [3]:
def print_split_line(s):
    print(f"\n====================={s.upper()}=====================\n")

In [4]:
from utils.regex_parse import comment



# Uncomment Parallel Corpus - no outliers

In [5]:
# no_outlier_codet5small
evaluate_codebleu("seq2seq_results/no_outlier_codet5small/codet5_preds.csv")

{'ngram': 0.7020337009365258,
 'weighted_ngram': 0.7158332483308997,
 'syntax_match': 0.9308169419505175,
 'dataflow_match': 0.8687742139204994,
 'code_bleu': 0.8043645262846106}

In [6]:
comment_pred_df = pd.read_csv("seq2seq_results/no_outlier_codet5small/codet5_preds.csv")

In [7]:
comment_pred_df

Unnamed: 0.1,Unnamed: 0,preds,labels
0,0,# Copyright 2018 The TensorFlow Authors. All R...,# test_triads.py - unit tests for the triads m...
1,1,#!/usr/bin/python\n# ex:set fileencoding=utf-8...,#!/usr/bin/python\n# ex:set fileencoding=utf-8...
2,2,from._make_annotations import _make_annotation...,from._make_annotations import _make_annotation...
3,3,"from hpack.hpack import encode_integer, decode...","from hpack.hpack import (\n encode_integer,..."
4,4,# -*- encoding: utf-8 -*-\n# This file is dist...,# -*- encoding: utf-8 -*-\n# This file is dist...
...,...,...,...
3235,3235,import pyglet.window\nfrom pyglet_gui.override...,import pyglet.window\n\nfrom pyglet_gui.overri...
3236,3236,# coding=utf-8\n# Copyright 2022 The TensorFlo...,# coding=utf-8\n# Copyright 2020 The TF-Agents...
3237,3237,#!/usr/bin/env python\n# -*- coding: utf-8 -*-...,"from __future__ import (absolute_import, divis..."
3238,3238,# -*- coding: utf-8 -*-\n# Copyright 2013 M...,# -*- coding: utf-8 -*-\n\n# Copyright 2013...


In [8]:
comment_labels = comment_pred_df["labels"].to_numpy()
comment_preds = comment_pred_df["preds"].to_numpy()

In [None]:
# getting unit score
comment_code_scores = []
comment_text_scores = []

gold_comments = []
pred_comments = []
gold_comment_texts = []
pred_comment_texts = []
gold_comments_count = []
pred_comments_count = []
gold_has_comments_list = []
pred_has_comments_list = []

for idx in tqdm(range(comment_preds.shape[0])):
    gold = comment_labels[idx]
    pred = comment_preds[idx]
    refs = [
        [gold]
    ]
    hyp = [pred]
    
    comment_code_score = get_codebleu(refs, hyp, "python", '0.25,0.25,0.25,0.25')
    
    gold_comment = comment(gold)
    pred_comment = comment(pred)
    gold_comment_text = "\n".join(gold_comment)
    pred_comment_text = "\n".join(pred_comment)
    gold_comment_count = len(gold_comment)
    pred_comment_count = len(pred_comment)
    gold_has_comment = len(gold_comment) > 0
    pred_has_comment = len(pred_comment) > 0
    
    comment_text_score = get_codebleu([[gold_comment_text]], [pred_comment_text], "python", '1,0,0,0')
    
    comment_code_scores += [comment_code_score]
    comment_text_scores += [comment_text_score]
       
    gold_comments += [gold_comment]
    pred_comments += [pred_comment]
    gold_comment_texts += [gold_comment_text]
    pred_comment_texts += [pred_comment_text]
    gold_comments_count += [gold_comment_count]
    pred_comments_count += [pred_comment_count]
    gold_has_comments_list += [gold_has_comment]
    pred_has_comments_list += [pred_has_comment]

In [10]:
comment_bleu_scores = np.array([s["ngram"] for s in comment_text_scores])

In [11]:
comment_bleu_scores.mean()

0.1916332918969301

In [12]:
comment_bleu_scores.max()

1.0

In [18]:
comment_bleu_scores[3236]

0.9013987750892306

In [17]:
idx = 188
print_split_line(f"{idx}-prediction")
print(comment_preds[idx])
print_split_line(f"{idx}-gold labels")
print(comment_labels[idx])
print_split_line(f"{idx}-score")
print(comment_bleu_scores[idx])



#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import worlds as my_world

fldr = os.getcwd() + os.sep + 'data' + os.sep + 'worlds'

class Planet(object):
    """
    class to manage the simplified evolution of a planet to 
    build a virtual world. Takes basic atmospheric parameters
    and *very* roughly uses these to guess what the world would
    look like.
    The idea is to be able to auto generate worlds as follows:
    green lush worlds: sun > 0.15, rain > 0.15
    earth like worlds: sun=0.2, rain=0.1, wind=0.1
    metal rich worlds: sun<0.2, wind>0.2, seismic_activity>0.6
    """
    def __init__(self, name, num_seeds, width, height, wind, rain, sun, lava):
        """
        All parameters must be between 0 and 1 and show the probability of
        that event. The numbers below are rough guidelines for normal planets
        wind 0.0 -> 0.2 : determines air currents, rain movement, topsoil
        rain 0.1 -> 0.6 : determines plant growth, river networks
        

In [13]:
comment_total = len(comment_preds)
sum(pred_has_comments_list), sum(gold_has_comments_list)

(1449, 2334)

In [None]:
for idx in range(comment_total):
    if comment_bleu_scores[idx] < 0.5 or comment_bleu_scores[idx] > 0.95:    
        continue
    
    if not pred_has_comments_list[idx]:
        continue
    if not gold_has_comments_list[idx]:
        continue
        
#     if "copyright" in pred_comment_texts[idx].lower():
#         continue
        
#     if "copyright" in gold_comment_texts[idx].lower():
#         continue
        
#     if "license" in pred_comment_texts[idx].lower():
#         continue
        
#     if "license" in gold_comment_texts[idx].lower():
#         continue
        
    
        
#     if "\n#" in pred_comment_texts[idx].lower():
#         continue
        
    # if " #" not in gold_comment_texts[idx].lower():
    #     continue
    
    # if " #" in pred_comment_texts[idx].lower():
    print_split_line(f"{idx}-prediction")
    print(comment_preds[idx])
    print_split_line(f"{idx}-gold labels")
    print(comment_labels[idx])
    print_split_line(f"{idx}-score")
    print(comment_bleu_scores[idx])
    
        

In [19]:
print("Accuracy of whether both do or do not have comments")
sum(np.array(pred_has_comments_list) == np.array(gold_has_comments_list)) / comment_total 

Accuracy of whether both do or do not have comments


0.6509259259259259

In [21]:
print("Accuracy of whether both have same comment counts")
sum(np.array(gold_comments_count) == np.array(pred_comments_count)) / comment_total 

Accuracy of whether both have same comment counts


0.4049382716049383

In [67]:
np.logical_and(comment_bleu_scores == 1, np.array(pred_has_comments_list), np.array(gold_has_comments_list))

array([False,  True, False, ..., False,  True, False])

In [64]:
print("Perfect Prediction Rate:", sum(comment_bleu_scores == 1) / comment_total)
print("Above 0.9 Comment BLEU Prediction Rate:", sum(comment_bleu_scores >= 0.9) / comment_total)

Perfect Prediction Rate: 0.09598765432098766
Above 0.9 Comment BLEU Prediction Rate: 0.13425925925925927


In [69]:
print("Perfect Prediction Rate:", sum(np.logical_and(comment_bleu_scores == 1, np.array(pred_has_comments_list), np.array(gold_has_comments_list))) / comment_total)
print("Above 0.9 Comment CodeBLEU Prediction Rate:", sum(np.logical_and(comment_bleu_scores >= 0.9, np.array(pred_has_comments_list), np.array(gold_has_comments_list))) / comment_total)

Perfect Prediction Rate: 0.09598765432098766
Above 0.9 Comment BLEU Prediction Rate: 0.13425925925925927


In [50]:

print_split_line("prediction")
print(preds[3236])
print_split_line("gold labels")
print(labels[3236])

# coding=utf-8
# Copyright 2022 The TensorFlow Datasets Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for trajectory."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from tf_agents.drivers import dynamic_episode_driver
from tf_agents.drivers import test_utils as drivers_test_utils
from tf_agents.environments import tf_py_environment
from tf_agents.trajectories import t

In [47]:
get_codebleu([[labels[20]]], [preds[20]], "python", '0.25,0.25,0.25,0.25')

{'ngram': 0.030049336124714957,
 'weighted_ngram': 0.049595120779045175,
 'syntax_match': 0.9565217391304348,
 'dataflow_match': 1.0,
 'code_bleu': 0.5090415490085487}

# Removed Class Parallel Corpus - with outliers

In [6]:
# outlier_class_codet5small
evaluate_codebleu("seq2seq_results/outlier_class_codet5small/codet5_preds.csv",  '0.25,0.25,0.25,0.25')

{'ngram': 0.9539663181494397,
 'weighted_ngram': 0.955850824785441,
 'syntax_match': 0.9574235970530824,
 'dataflow_match': 0.8715078962664325,
 'code_bleu': 0.9346871590635989}

In [29]:
class_pred_df = pd.read_csv("seq2seq_results/outlier_class_codet5small/codet5_preds.csv")

In [30]:
class_pred_df

Unnamed: 0.1,Unnamed: 0,preds,labels
0,0,"""""""Sequence-to-sequence model with an attentio...","""""""Sequence-to-sequence model with an attentio..."
1,1,"from django.db import models, migrations\n\ncl...","from django.db import models, migrations\n\ncl..."
2,2,"from lampost.di.resource import Injected, modu...","from lampost.di.resource import Injected, modu..."
3,3,"import logging\nfrom typing import Any, Dict\n...","import logging\nfrom typing import Any, Dict\n..."
4,4,"""""""Module for finding vulnerabilities based on...","""""""Module for finding vulnerabilities based on..."
...,...,...,...
7840,7840,from.dice import Dice\nfrom.player import Play...,from.dice import Dice\nfrom.player import Play...
7841,7841,from.views import BaseView\nfrom forums.extens...,from.views import BaseView\nfrom forums.extens...
7842,7842,"""""""\nutils.py\n========\nThis submodule contai...","""""""\nutils.py\n========\nThis submodule contai..."
7843,7843,"""""""Tests for audio_classifier.""""""\nimport enum...","""""""Tests for audio_classifier.""""""\nimport enum..."


In [31]:
class_labels = class_pred_df["labels"].to_numpy()
class_preds = class_pred_df["preds"].to_numpy()

In [None]:
# getting unit score
class_scores = []
for idx in tqdm(range(class_preds.shape[0])):
    refs = [
        [class_labels[idx]]
    ]
    hyp = [class_preds[idx]]
    score = get_codebleu(refs, hyp, "python", '0.25,0.25,0.25,0.25')
    class_scores += [score]

In [54]:
class_total = class_preds.shape[0]

In [43]:
for idx, score in enumerate(class_scores):
    if score["weighted_ngram"] == 1:
        print(idx)
        break

0


In [None]:

from datasets import load_from_disk, load_metric
fname_prefix = ""

test_codet5_dataset = load_from_disk(fname_prefix + 'datasets/codet5_test_class_bq_padded.hf') #codet5_train_class_bq_padded.hf


In [48]:
keyword = "Construct sequence encoder"
for idx, code in enumerate(test_codet5_dataset["content"]):
    if keyword in code:
        print(idx)

train 63833


In [51]:
class_code_bleus = np.array([s["code_bleu"] for s in class_scores])

In [60]:
print("Perfect Prediction Rate:", sum(class_code_bleus == 1) / class_total)
print("Above 0.9 CodeBLEU Prediction Rate:", sum(class_code_bleus > 0.9) / class_total)

Perfect Prediction Rate: 0.28846398980242194
Above 0.9 CodeBLEU Prediction Rate: 0.7445506692160612


In [49]:
# a perfect case
print_split_line("input")
print(eval_dataset["train"]["no_class_content"][63833])
print_split_line("prediction")
print(class_preds[0])
print_split_line("gold labels")
print(class_labels[0])
print_split_line("score")
print(class_scores[0])




'Sequence-to-sequence model with an attention mechanism.'
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from encoder_decoder import encoder
from encoder_decoder.framework import EncoderDecoderModel
from . import rnn_decoder
'Sequence-to-sequence model with attention and for multiple buckets.\n\n    This class implements a multi-layer recurrent neural network as encoder,\n    and an attention-based decoder. This is the same as the model described in\n    this paper: http://arxiv.org/abs/1412.7449 - please look there for details,\n    or into the seq2seq library for complete model implementation.\n    This class also allows to use GRU cells in addition to LSTM cells, and\n    sampled softmax to handle large output vocabulary size. A single-layer\n    version of this model, but with bi-directional encoder, was presented in\n      http://arxiv.org/abs/1409.0473\n    and sampled softmax is described in Section 3 of the follow

In [40]:

input_idx = 53092
output_idx = 4293
print_split_line("input")
print(eval_dataset["train"]["no_class_content"][input_idx])
print_split_line("prediction")
print(class_preds[output_idx])
print_split_line("gold labels")
print(class_labels[output_idx])
print_split_line("score")
print(class_scores[output_idx])




import struct

def __init__():
    h = [1779033703, 3144134277, 1013904242, 2773480762, 1359893119, 2600822924, 528734635, 1541459225]
    k = [1116352408, 1899447441, 3049323471, 3921009573, 961987163, 1508970993, 2453635748, 2870763221, 3624381080, 310598401, 607225278, 1426881987, 1925078388, 2162078206, 2614888103, 3248222580, 3835390401, 4022224774, 264347078, 604807628, 770255983, 1249150122, 1555081692, 1996064986, 2554220882, 2821834349, 2952996808, 3210313671, 3336571891, 3584528711, 113926993, 338241895, 666307205, 773529912, 1294757372, 1396182291, 1695183700, 1986661051, 2177026350, 2456956037, 2730485921, 2820302411, 3259730800, 3345764771, 3516065817, 3600352804, 4094571909, 275423344, 430227734, 506948616, 659060556, 883997877, 958139571, 1322822218, 1537002063, 1747873779, 1955562222, 2024104815, 2227730452, 2361852424, 2428436474, 2756734187, 3204031479, 3329325298]

def rotate_right(v, n):
    w = ((v >> n) | (v << (32 - n)))
    return (w & 4294967295)

def compre

In [34]:
print(eval_dataset["train"]["no_class_content"][74459])


import ctypes
pass

def __init__(message):
    message += (' (%s)' % ctypes.WinError())
    super(PyperclipWindowsException, self).__init__(message)



In [52]:
# from transformers import RobertaTokenizer
# tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-small")

# Removed Doc String Parallel Corpus - with outliers

In [3]:
# outlier_class_codet5small
evaluate_codebleu("seq2seq_results/outlier_docstring_codet5small/codet5_preds.csv",  '0.25,0.25,0.25,0.25')

{'ngram': 0.6007622882171681,
 'weighted_ngram': 0.6152656721984298,
 'syntax_match': 0.8764716714385848,
 'dataflow_match': 0.7972510418090768,
 'code_bleu': 0.7224376684158149}

In [5]:
docstr_pred_df = pd.read_csv("seq2seq_results/outlier_docstring_codet5small/codet5_preds.csv")

In [164]:
docstr_labels = docstr_pred_df["labels"].to_numpy()
docstr_preds = docstr_pred_df["preds"].to_numpy()

In [165]:
import re
def get_docstring(text):
    regex_docstr = "^\s*\'{3}([\s\S]*?)\'{3}|^\s*\"{3}([\s\S]*?)\"{3}"
    docstr_matches = re.findall(regex_docstr, text, re.M | re.S)
    docstrs = []
    for match in docstr_matches:
        docstr_a, docstr_b = match
        if docstr_a:
            docstrs += [docstr_a]
        else:
            docstrs += [docstr_b]
    return docstrs

In [None]:
# getting unit score
gold_docstrs = []
pred_docstrs = []
gold_docstr_counts = []
pred_docstr_counts = []
gold_docstr_texts = []
pred_docstr_texts = []
gold_has_docstr_list = []
pred_has_docstr_list = []

docstr_code_scores = []
docstr_text_scores = []

for idx in tqdm(range(docstr_preds.shape[0])):
    
    gold = docstr_labels[idx]
    pred = docstr_preds[idx]
    
    refs = [
        [gold]
    ]
    hyp = [pred]
    
    gold_docstr = get_docstring(gold)
    pred_docstr = get_docstring(pred)
    gold_docstr_text = "\n".join(gold_docstr)
    pred_docstr_text = "\n".join(pred_docstr)
    gold_docstr_count = len(gold_docstr)
    pred_docstr_count = len(pred_docstr)
    gold_has_docstr = len(gold_docstr) > 0
    pred_has_docstr = len(pred_docstr) > 0
    
    
    docstr_code_score = get_codebleu(refs, hyp, "python", '0.25,0.25,0.25,0.25')
    docstr_text_score = get_codebleu([[gold_docstr_text]], [pred_docstr_text], "python", '1,0,0,0')
    
    docstr_code_scores += [docstr_code_score]
    docstr_text_scores += [docstr_text_score]
       
    gold_docstrs += [gold_docstr]
    pred_docstrs += [pred_docstr]
    gold_docstr_texts += [gold_docstr_text]
    pred_docstr_texts += [pred_docstr_text]
    gold_docstr_counts += [gold_docstr_count]
    pred_docstr_counts += [pred_docstr_count]
    gold_has_docstr_list += [gold_has_docstr]
    pred_has_docstr_list += [pred_has_docstr]

In [169]:
docstr_text_bleus = np.array([s["ngram"] for s in docstr_text_scores])

In [170]:
docstr_code_bleus = np.array([s["code_bleu"] for s in docstr_code_scores])

In [171]:
docstr_text_bleus.mean()

0.06557760175998402

In [173]:
docstr_total = docstr_preds.shape[0]

In [174]:
print("Perfect Prediction Rate:", sum(docstr_code_bleus == 1) / docstr_total)
print("Above 0.9 CodeBLEU Prediction Rate:", sum(docstr_code_bleus > 0.9) / docstr_total)

Perfect Prediction Rate: 0.06990014265335236
Above 0.9 CodeBLEU Prediction Rate: 0.2712196861626248


In [179]:
idx = 760
print_split_line(f"{idx}-prediction")
print(docstr_preds[idx])
print_split_line(f"{idx}-gold labels")
print(docstr_labels[idx])
print_split_line(f"{idx}-score")
print(docstr_text_bleus[idx])



from vsg import token
from vsg import violation
from vsg.rule_group import structure

class formal_part_in_association_element_between_tokens(structure.Rule):
    """
    Checks for a formal part in the association element.

    Parameters
    ----------

    name : string
       The group the rule belongs to.

    identifier : string
       unique identifier.  Usually in the form of 00N.

    oStart : integer
       The starting of the token.

    oEnd : integer
       The ending of the token.
    """

    def __init__(self, name, identifier, oStart, oEnd):
        structure.Rule.__init__(self, name=name, identifier=identifier)
        self.fixable = False
        self.oStart = oStart
        self.oEnd = oEnd

    def _get_tokens_of_interest(self, oFile):
        return oFile.get_association_elements_between_tokens(self.oStart, self.oEnd)

    def _analyze(self, lToi):
        for oToi in lToi:
            lTokens = oToi.get_tokens()
            bFound = False
            for oToken

In [177]:
for idx in range(docstr_total):
    if docstr_text_bleus[idx] >= 0.5:
        print_split_line(f"{idx}-prediction")
        print(pred_docstr_texts[idx])
        print_split_line(f"{idx}-gold labels")
        print(gold_docstr_texts[idx])
        print_split_line(f"{idx}-score")
        print(docstr_text_bleus[idx])



Network interface and its custom security rules.

    Variables are only populated by the server, and will be ignored when
    sending a request.

    :ivar id: ID of the network interface.
    :vartype id: str
    :param security_rules: Collection of custom security rules.
    :type security_rules:
     list[~azure.mgmt.network.v2017_11_01.models.SecurityRule]
    


Network interface and its custom security rules.

    Variables are only populated by the server, and will be ignored when
    sending a request.

    :ivar id: Network interface ID.
    :vartype id: str
    :param security_rules: Collection of custom security rules.
    :type security_rules:
     list[~azure.mgmt.network.v2017_03_01.models.SecurityRule]
    


0.8132493528194856



    Onshape REST API

    The Onshape REST API consumed by all clients.  # noqa: E501

    The version of the OpenAPI document: 1.113
    Contact: api-support@onshape.zendesk.com
    Generated by: https://openapi-generator.tech




    Onsha