In [141]:
import re
import tqdm
import wikipedia
import random
import numpy as np
import pandas as pd

from prompting.tools import CodingDataset
from prompting.tasks.debugging import corrupt, DebuggingTask, diff


In [68]:
dataset = CodingDataset()

In [84]:
context = dataset.next()

In [85]:
context

{'code': '"use strict";\n\n/**\n * Generator function that provides an infinite sequence of numbers of the fibonacci series\n */\nfunction* fibonacci() {\n    let leftValue = 0;\n    let rigthValue = 1;\n    while (true) {\n        let currentValue = leftValue;\n        yield currentValue;\n        leftValue = rigthValue;\n        rigthValue = currentValue + leftValue;\n    }\n}\n\nmodule.exports = fibonacci;',
 'repo_name': 'juancancela/invenco',
 'path': 'fibonacci.js',
 'language': 'JavaScript',
 'license': 'apache-2.0',
 'size': 386,
 'fetch_time': 0.0031769275665283203}

In [86]:
print(context['code'])

"use strict";

/**
 * Generator function that provides an infinite sequence of numbers of the fibonacci series
 */
function* fibonacci() {
    let leftValue = 0;
    let rigthValue = 1;
    while (true) {
        let currentValue = leftValue;
        yield currentValue;
        leftValue = rigthValue;
        rigthValue = currentValue + leftValue;
    }
}

module.exports = fibonacci;


In [87]:
task = DebuggingTask(llm_pipeline=None, context=context)
print(task.query)

"use strict";

/**
   Generator function that provides an infinite sequence of numbers of the fibonacci series
 */
function* fibonacci() {
    let leftValue = 0;
    let rigthValue = 1;
    while (rue) {
        let*currentValue = leftValue;
        yield currentValue;
        leftValue = rigthValue;
        rigthValue = currentValue + leftValue;
    }
}

module.exports = fibonacci;
[34m2024-01-30 15:07:48.017[0m | [1m      INFO      [0m | Removing the following 1 chunks: ['t'] at indices [197]
[34m2024-01-30 15:07:48.017[0m | [1m      INFO      [0m | Swapping chunk ' ' at index 215 with chunk '*' at index 20


In [88]:
print(diff(task.query, task.context['code']))

--- 

+++ 

@@ -1,13 +1,13 @@

 "use strict";
 
 /**
-   Generator function that provides an infinite sequence of numbers of the fibonacci series
+ * Generator function that provides an infinite sequence of numbers of the fibonacci series
  */
 function* fibonacci() {
     let leftValue = 0;
     let rigthValue = 1;
-    while (rue) {
-        let*currentValue = leftValue;
+    while (true) {
+        let currentValue = leftValue;
         yield currentValue;
         leftValue = rigthValue;
         rigthValue = currentValue + leftValue;


In [47]:
all_languages = {
    "C++": {
        'keywords': ['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if', 'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void', 'volatile', 'while'],
        'libraries': ['iostream', 'fstream', 'string', 'vector', 'map', 'set', 'algorithm', 'cmath', 'cstdio', 'cstdlib', 'ctime', 'cstring', 'cassert', 'cctype', 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath', 'csetjmp', 'csignal', 'cstdarg', 'cstddef', 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype', 'complex', 'deque', 'exception', 'fstream', 'functional', 'iomanip', 'ios', 'iosfwd', 'iostream', 'istream', 'iterator', 'limits', 'list', 'locale', 'map', 'memory', 'new', 'numeric', 'ostream', 'queue', 'set', 'sstream', 'stack', 'stdexcept', 'streambuf', 'string', 'typeinfo', 'utility', 'valarray', 'vector'],
        'comments': ['//', '/*', '*/'],
    },
    "Dockerfile": {
        'keywords': ['from', 'maintainer', 'run', 'cmd', 'expose', 'env', 'add', 'copy', 'entrypoint', 'volume', 'user', 'workdir', 'onbuild'],
        'libraries': [],
        'comments': ['#']
    },
    "HTML": {
        'keywords': ['div', 'span', 'input', 'ul', 'body', 'tag', 'html', 'head', 'title', 'meta', 'link', 'script', 'style', 'a', 'img', 'table', 'label'],
        'libraries': [],
        'comments': ['<!--', '-->']
    },
    "Java": {
        'keywords': ['abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extends', 'final', 'finally', 'float', 'for', 'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long', 'native', 'new', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 'void', 'volatile', 'while'],
        'libraries': ['java.awt', 'java.awt.event', 'java.io', 'java.lang', 'java.math', 'java.net', 'java.text', 'java.util', 'javax.swing'],
        'comments': ['//', '/*', '*/', '*'],
    },
    "JavaScript": {
        'keywords': ['abstract', 'arguments', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'double', 'else', 'enum', 'eval', 'export', 'extends', 'false', 'final', 'finally', 'float', 'for', 'function', 'goto', 'if', 'implements', 'import', 'in', 'instanceof', 'int', 'interface', 'let', 'long', 'native', 'module.exports' 'new', 'null', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try', 'typeof', 'var', 'void', 'volatile', 'while', 'with', 'yield'],
        'libraries': ['React', 'express','mongoose', 'axios', 'redux', 'react-redux', 'react-router-dom', 'react-dom', 'react-scripts', 'material-ui'],
        'comments': ['//', '/*', '*/']
    },
    "Python": {'keywords': ['False', 'None', 'True', 'and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield'],
               'libraries': ['numpy', 'pandas', 'matplotlib', 'seaborn', 'scipy', 'sklearn', 'tensorflow', 'keras', 'pytorch', 'django', 'flask', 'requests', 'bs4', 'selenium', 'pyautogui', 'pyperclip', 'pyinputplus', 'pillow'],
               'comments': ['#']
    },
    "SQL": {'keywords': ['add', 'all', 'alter', 'and', 'any', 'as', 'asc', 'backup', 'between', 'case', 'check', 'column', 'constraint', 'create', 'database', 'default', 'delete', 'desc', 'distinct', 'drop', 'exec', 'exists', 'foreign', 'from', 'full', 'group', 'having', 'in', 'index', 'inner', 'insert', 'into', 'is', 'join', 'key', 'left', 'like', 'limit', 'not', 'null', 'on', 'or', 'order', 'outer', 'primary', 'procedure', 'right', 'rownum', 'select', 'set', 'table', 'top', 'truncate', 'union', 'unique', 'update', 'values', 'view', 'where'],
            'comments': ['--', '/*', '*/']
    },
    "Shell": {'keywords': ['alias', 'bg', 'bind', 'break', 'builtin', 'caller', 'cd', 'command', 'compgen', 'complete', 'continue', 'declare', 'dirs', 'disown', 'echo', 'enable', 'eval', 'exec', 'exit', 'export', 'false', 'fc', 'fg', 'getopts', 'hash', 'help', 'history', 'jobs', 'kill', 'let', 'local', 'logout', 'popd', 'printf', 'pushd', 'pwd', 'read', 'readonly', 'return', 'set', 'shift', 'shopt', 'source', 'suspend', 'test', 'times', 'trap', 'true', 'type', 'typeset', 'ulimit', 'umask', 'unalias', 'unset', 'wait'],
              'comments': ['#']
    },
}

In [None]:
# def corrupt_code(code, language):
#     keywords = all_languages[language]['keywords']
#     # either remove or replace keywords or the lines they are on
    


In [129]:
def get_special_contents(code, language):

    # filter out comments
    lines = []
    comment_lines = 0
    for line in code.splitlines():
        # filter out comments
        if any(line.startswith(symbol) for symbol in all_languages[language]['comments']):
            comment_lines += 1
            continue

        lines.append(line.lower())
    code = '\n'.join(lines)

    def extract(field):
        matches = set()

        # check which keywords and libraries are present in the code
        for keyword in all_languages[language].get(field,[]):
            if re.search(r'\b' + keyword + r'\b', code):
                matches.add(keyword)
        return matches

    present_keywords = extract('keywords')
    present_libraries = extract('libraries')
    
    # here we select a library or a keyword as the forward term
    if present_libraries and random.random() < 0.7:
        forward_term = random.choice(list(present_libraries))
    elif present_keywords:
        forward_term = random.choice(list(present_keywords))
    else:
        forward_term = None

    return present_keywords, present_libraries, comment_lines, forward_term

In [130]:
get_special_contents(context['code'], context['language'])

({'double', 'for', 'if', 'int', 'return'}, {'new'}, 1, 'new')

In [131]:
results = []
for i in tqdm.tqdm(range(10000)):
    context = dataset.next()
    if context['language'] not in all_languages:
        continue
    present_keywords, present_libraries, comment_lines, forward_term = get_special_contents(context['code'], context['language'])
    results.append({
        'language': context['language'],
        'keywords': present_keywords,
        'num_keywords': len(present_keywords),
        'libraries': present_libraries,
        'num_libraries': len(present_libraries),
        'code_lines': len(context['code'].splitlines()),
        'comment_lines': comment_lines,
        'forward_term': forward_term,
    })



100%|██████████| 10000/10000 [00:29<00:00, 337.81it/s]


In [132]:
df = pd.DataFrame(results)
df

Unnamed: 0,language,keywords,num_keywords,libraries,num_libraries,code_lines,comment_lines,forward_term
0,HTML,"{span, html, input, table, a, style, body}",7,{},0,67,0,table
1,JavaScript,"{var, if, function, return, with}",5,{},0,88,2,return
2,Python,"{from, import}",2,{},0,94,15,import
3,SQL,"{column, into, drop, alter, table, values, ins...",7,{},0,13,1,drop
4,Java,"{new, final, super, public, private, extends, ...",10,{},0,15,0,public
...,...,...,...,...,...,...,...,...
9746,HTML,"{img, title, html, div, link, head, meta, tabl...",10,{},0,50,0,body
9747,HTML,{a},1,{},0,5,0,a
9748,Python,"{for, in, import, return, from, def}",6,{},0,66,1,from
9749,JavaScript,"{var, if, function, true, else}",5,{express},1,34,14,if


In [135]:
df.language.value_counts()

language
Java          3396
JavaScript    2192
HTML          1546
Python        1210
C++            860
Shell          316
SQL            125
Dockerfile     106
Name: count, dtype: int64

In [137]:
df.forward_term.value_counts(normalize=True).head(30).cumsum()

forward_term
java.util    0.065658
return       0.110230
import       0.153340
function     0.187056
for          0.220459
if           0.252818
class        0.284656
public       0.315136
package      0.343424
var          0.369937
this         0.395825
java.io      0.419520
void         0.441649
html         0.461795
div          0.481420
a            0.499061
title        0.516701
from         0.532255
const        0.547495
django       0.562422
new          0.576827
body         0.591023
true         0.603967
extends      0.616284
def          0.628079
in           0.638727
static       0.648956
head         0.659186
int          0.669102
else         0.678288
Name: proportion, dtype: float64

In [138]:
search_words = ['programming', 'keyword', 'library', 'coding']
df['search_query'] = df.apply(lambda row: f"{row['forward_term']} {row['language']} {random.choice(search_words)}", axis=1)

In [163]:
df.search_query.value_counts(normalize=True).head(10).cumsum()

search_query
java.util Java library             0.018972
java.util Java keyword             0.035689
java.util Java coding              0.051379
java.util Java programming         0.064506
function JavaScript library        0.073121
function JavaScript keyword        0.081735
function JavaScript programming    0.089837
function JavaScript coding         0.097631
public Java coding                 0.105220
var JavaScript coding              0.112809
Name: proportion, dtype: float64

In [151]:
forward_terms = random.choices(df.search_query.unique(),k=100)
forward_terms


['expose Dockerfile programming',
 'java.awt Java programming',
 'return JavaScript keyword',
 'mongoose JavaScript coding',
 'cstring C++ coding',
 'history Shell programming',
 'table SQL library',
 'fstream C++ library',
 'group SQL library',
 'boolean Java coding',
 'not Python programming',
 'unsigned C++ keyword',
 'is Python library',
 'if C++ coding',
 'complex C++ keyword',
 'None C++ programming',
 'new C++ library',
 'and Python programming',
 'new C++ coding',
 'do C++ library',
 'ostream C++ programming',
 'class JavaScript programming',
 'with Python programming',
 'javax.swing Java coding',
 'class Java programming',
 'const JavaScript library',
 'pytorch Python library',
 'group SQL library',
 'memory C++ programming',
 'matplotlib Python programming',
 'arguments JavaScript library',
 'alias Shell library',
 'int JavaScript keyword',
 'printf Shell library',
 'entrypoint Dockerfile programming',
 'head HTML keyword',
 'default Java keyword',
 'delete SQL coding',
 'arg

In [152]:

results = []
for term in tqdm.tqdm(forward_terms):
    results.append(wikipedia.search(term, results=100))

100%|██████████| 100/100 [01:01<00:00,  1.62it/s]


In [154]:
results

[['Docker (software)', 'Ruby on Rails'],
 ['Java (programming language)',
  'Final (Java)',
  'Java Platform, Micro Edition',
  'Java OpenGL',
  'Java package',
  'Java Platform, Standard Edition',
  'Swing (Java)',
  'Java Native Interface',
  'GNU Compiler for Java',
  'Java applet',
  'Java Class Library',
  'Java TV',
  'Abstract Window Toolkit',
  'Java version history',
  'Java (software platform)',
  'JavaOS',
  'Java Media Framework',
  'Java AWT Native Interface',
  'Eclipse (software)',
  'Java 2D',
  'JWt (Java web toolkit)',
  'AWT',
  'Jython',
  'Event-driven programming',
  'Java Foundation Classes',
  'Standard Widget Toolkit',
  'BD-J',
  'QuickTime for Java',
  'Model–view–presenter',
  'Jakarta Server Faces',
  'Wrapper function',
  'Comparison of the Java and .NET platforms',
  'LWJGL',
  'Event (computing)',
  'Apache Harmony',
  'Event dispatching thread',
  'MIDlet',
  'Android software development',
  'Judoscript',
  'Clipping (computer graphics)',
  'Connected 

In [158]:
pd.Series(list(map(lambda x: len(x),results))).value_counts(normalize=True)

100    0.73
0      0.04
5      0.02
15     0.02
95     0.02
45     0.02
2      0.01
31     0.01
1      0.01
4      0.01
75     0.01
76     0.01
51     0.01
60     0.01
61     0.01
38     0.01
17     0.01
79     0.01
8      0.01
11     0.01
96     0.01
Name: proportion, dtype: float64

In [160]:

iou = np.zeros((len(forward_terms), len(forward_terms)))

for i, term in enumerate(forward_terms):
    r1 = results[i]
    for j, term2 in enumerate(forward_terms):
        if i == j or len(results[j]) == 0:
            continue

        iou[i,j] = len(set(r1).intersection(results[j])) / len(set(r1).union(results[j]))

iou.round(2)

array([[0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.01, ..., 0.07, 0.01, 0.  ],
       [0.  , 0.01, 0.  , ..., 0.06, 0.03, 0.05],
       ...,
       [0.  , 0.07, 0.06, ..., 0.  , 0.04, 0.01],
       [0.  , 0.01, 0.03, ..., 0.04, 0.  , 0.01],
       [0.  , 0.  , 0.05, ..., 0.01, 0.01, 0.  ]])

In [162]:
import plotly.express as px

px.imshow(iou, color_continuous_scale='Blues',
          title='Intersection over Union of Wikipedia search results for Debugging forward terms',
          width=800, height=800,
)

In [140]:
for lang in df.language.unique():
    print(f'------------------------\nLanguage: {lang}')
    print(df.loc[df.language==lang].libraries.apply(list).explode().value_counts())

------------------------
Language: HTML
Series([], Name: count, dtype: int64)
------------------------
Language: JavaScript
libraries
express             111
redux                39
mongoose             23
react-redux          18
react-dom            10
react-router-dom      6
axios                 3
Name: count, dtype: int64
------------------------
Language: Python
libraries
django        206
numpy          76
requests       40
flask          26
matplotlib     25
pandas         20
scipy          14
tensorflow     12
sklearn         6
bs4             4
keras           3
seaborn         3
pytorch         1
selenium        1
pillow          1
Name: count, dtype: int64
------------------------
Language: SQL
Series([], Name: count, dtype: int64)
------------------------
Language: Java
libraries
java.util                 1028
java.io                    433
java.lang                  125
java.net                    60
java.awt                    56
javax.swing                 56
java.math  

In [110]:
df.groupby('language').libraries.apply(list).explode().value_counts()

libraries
{}                     708
{java.util}             94
{java.io}               24
{django}                16
{string}                11
                      ... 
{ostream, string}        1
{new, string, set}       1
{cassert, iterator}      1
{new, set}               1
{matplotlib}             1
Name: count, Length: 76, dtype: int64

In [41]:
"""
context is sampled from CodingDataset

context contains code, language, and other fields

we check the code for keywords in the language
we then prompt the LLM to create a query about that line in the code
we then generate a reference answer to the query [THE PROBLEM IS THAT THE REFERENCE IS NOT ALWAYS CORRECT UNLESS WE USE RAG]
"""

'\ncontext is sampled from CodingDataset\n\ncontext contains code, language, and other fields\n\nwe check the code for keywords in the language\nwe then prompt the LLM to create a query about that line in the code\nwe then generate a reference answer to the query [THE PROBLEM IS THAT THE REFERENCE IS NOT ALWAYS CORRECT UNLESS WE USE RAG]\n'

In [118]:
random_present_keyword = 'django'#random.choice(list(present_keywords))
search_query = f'{context["language"]} {random_present_keyword}'
print(search_query)
wikipedia.search(search_query, results=10)


for term in forward_terms:
    print(term)
    results.append(wikipedia.search(term, results=100))

Java django


['Django (web framework)',
 'Model–view–controller',
 'Central Authentication Service',
 'Google App Engine',
 'Server-sent events',
 'List of object–relational mapping software',
 'W3Schools',
 'Bitbucket',
 'Ratatouille (film)',
 'Bootstrap (front-end framework)']

In [119]:
wikipedia.search('java java.util')

['Generics in Java',
 'Java collections framework',
 'Java version history',
 'Java (programming language)',
 'Final (Java)',
 'Java Platform, Standard Edition',
 'Java annotation',
 'Java syntax',
 'Primitive wrapper class in Java',
 'Java package']