Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
guody5 committed Apr 12, 2022
1 parent edc05c3 commit 3e58572
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions UniXcoder/downstream-tasks/zero-shot-search/dataset/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import json

for lang,suffix in [("Java",".java"),("Ruby",".rb"),("Python",".py")]:
with open("{}.jsonl".format(lang.lower())) as f, open("{}_with_func.jsonl".format(lang.lower()),"w") as f1:
for line in f:
js = json.loads(line.strip())
problem_id = str(js["label"])
problem_id = "p" + "0" * (5-len(problem_id)) + problem_id
language = lang
submission_id = js["index"]
func = open("Project_CodeNet/data/{}/{}/{}{}".format(problem_id,language,submission_id,suffix)).read()
js["func"] = func
f1.write(json.dumps(js)+"\n")

0 comments on commit 3e58572

Please sign in to comment.