In [1]:
!pip install -q -U datasets

In [2]:
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

In [3]:
!pip install -q git+https://github.com/huggingface/transformers.git@main accelerate

### Loading the model and tokenizer

In [4]:
import datasets

In [5]:
from transformers import AutoTokenizer
import transformers
import torch
from tqdm import notebook
import pandas as pd
import numpy as np
from datasets import Dataset
from transformers.pipelines.pt_utils import KeyDataset

import json



In [6]:
model_id = "codellama/CodeLlama-7b-Instruct-hf"

### Preparing the Pipeline

In [7]:
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    model_max_length=512,
    padding_side="left",
    add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [8]:
def tokenize(data):
    result = tokenizer(
        data['prompt'],
        truncation=True,
        max_length=512,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [9]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.float16,
    device_map="auto",
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [10]:
pipeline.tokenizer.pad_token_id = tokenizer.eos_token_id

### Generating Code

In [11]:
def llama_v2_prompt(
    messages: list[dict]
):
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    BOS, EOS = "<s>", "</s>"
    DEFAULT_SYSTEM_PROMPT = f"""Provide answers in Python. Wrap around code in [PYTHON] and [/PYTHON]. Don't write any comments in generated code."""

    if messages[0]["role"] != "system":
        messages = [
            {
                "role": "system",
                "content": DEFAULT_SYSTEM_PROMPT,
            }
        ] + messages


    messages = [
        {
            "role": messages[1]["role"],
            "content": B_SYS + messages[0]["content"] + E_SYS + messages[1]["content"],
        }
    ] + messages[2:]

    messages_list = [
        f"{BOS}{B_INST}{(prompt['content']).strip()}{E_INST}{(answer['content']).strip()}{EOS}"
        for prompt, answer in zip(messages[::2], messages[1::2])
    ]
    messages_list.append(f"{BOS}{B_INST}{(messages[-1]['content']).strip()}{E_INST}\n")

    return "".join(messages_list)

In [12]:
def generate_code(dataset):
    responses = []
    batch_count = 0
    batch_size = 2
    print(f"Total batches = {len(dataset)//batch_size}")
    for out in pipeline(
      KeyDataset(dataset, "prompt"),
      do_sample=True,
      temperature=0.1,
      top_p=0.9,
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
      max_new_tokens=768,
      batch_size = batch_size
      ):
        batch_count += 1
        for o in out:
            responses.append(o['generated_text'])
            #print(o['generated_text'])
        print(f'Processed batch: {batch_count}')
    return responses

In [13]:
def end_overlap(a, b):
    for i in range(0, len(a)):
        if b.startswith(a[i:]):
            return i
    return len(a)

def remove_prefix(s, prefix):
    prefix_end_idx = s.index(prefix) + len(prefix)
    return s[prefix_end_idx:]

def extract_code(prompt, response, code_context):
    B_PYTHON, E_PYTHON = '[PYTHON]', '[/PYTHON]'
    trimmed_response = remove_prefix(response, prompt)
    if trimmed_response.startswith(B_PYTHON):
        trimmed_response = remove_prefix(trimmed_response, B_PYTHON)
    try:
        suffix_idx = trimmed_response.index(E_PYTHON)
        trimmed_response = trimmed_response[:suffix_idx]
    except ValueError as ve:
          pass
    trimmed_response = trimmed_response.strip()
    code_context = code_context.strip()
    overlap_idx = end_overlap(code_context, trimmed_response)
    cleaned_response = code_context[:overlap_idx] + trimmed_response
    return cleaned_response

def save_file(path, code):
    with open(path, 'w') as file:
        file.write(code)

def run_code_generation(dataset, save_path, n_passes=5):
    responses = generate_code(dataset)
    return responses
  # for index, response in responses:
  #   question = row['question']
  #   code_context = row['code_context']
  #   for p in range(n_passes):
  #     prompt = f"# {question}\n{code_context}\n"
  #     instruction = {
  #       "role": "user",
  #       "content": prompt,
  #     }
  #     prompt, response = generate_code([instruction])
  #     code = extract_code(prompt, response, code_context)
  #     save_file(f"{save_path}/question-{index}-pass-{p}.py", code)

In [14]:

response = """<s>[INST]<<SYS>>
Provide answers in Python. Wrap around code in [PYTHON] and [/PYTHON]. Don't write any comments in generated code.
<</SYS>>

# Maximum Prefix Sum possible by merging two given arrays.
def maxPresum(a, b):[/INST]
[PYTHON]
def maxPresum(a, b):
    max_sum = 0
    for i in range(len(a)):
        for j in range(len(b)):
            max_sum = max(max_sum, a[i] + b[j])
    return max_sum
[/PYTHON]
"""

prompt = """<s>[INST]<<SYS>>
Provide answers in Python. Wrap around code in [PYTHON] and [/PYTHON]. Don't write any comments in generated code.
<</SYS>>

# Maximum Prefix Sum possible by merging two given arrays.
def maxPresum(a, b):[/INST]
"""

code_context = """def maxPresum(a, b):
"""

In [15]:
# extract_code(prompt, response, code_context)

In [16]:
CODE_GENERATION_PATH = "./generations/python"

In [17]:
train_df = pd.read_csv('/kaggle/input/python-generation-processed/train_response_processed.csv', index_col=0)
train_df.head()

Unnamed: 0,id,text,code,question,functions,code_context,context,llama_prompt,response,response_code
0,0,Maximum Prefix Sum possible by merging two giv...,"def maxPresum(a, b):\n X = max(a[0], 0)\n ...",Maximum Prefix Sum possible by merging two giv...,"['def maxPresum(a, b):']","def maxPresum(a, b):","def maxPresum(a, b):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"def maxPresum(a, b):\n max_sum = 0\n for..."
1,1,Check if a number can be represented as sum of...,import math\n\n\ndef sumOfTwoCubes(n):\n lo...,Check if a number can be represented as sum of...,['def sumOfTwoCubes(n):'],import math\n\n\ndef sumOfTwoCubes(n):,import math\n\n\ndef sumOfTwoCubes(n):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,import math\n\n\ndef sumOfTwoCubes(n):\n fo...
2,2,Generate an N | Python3 program for the above ...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,Generate an N.,"['def sieveOfPrimes():', 'def getArray(arr, N):']",sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...
3,3,Nth natural number after removing all numbers ...,def findNthNumber(N):\n result = 0\n p =...,Nth natural number after removing all numbers ...,['def findNthNumber(N):'],def findNthNumber(N):,def findNthNumber(N):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,def findNthNumber(N):\n nth_number = 1\n ...
4,4,Check if an integer is rotation of another giv...,"import math\n\n\ndef check(A, B):\n if (A =...",Check if an integer is rotation of another giv...,"['def check(A, B):']","import math\n\n\ndef check(A, B):","import math\n\n\ndef check(A, B):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"import math\n\n\ndef check(A, B):\n if A ==..."


In [18]:
REPORTS_PATH = '/kaggle/input/python-generations-analysis'

ids = train_df['id'].values.tolist()

analysis_reports = {}
for id in notebook.tqdm(ids):
    with open(f'{REPORTS_PATH}/question-{id}-pass-1.json') as file:
        report = json.load(file)
        analysis_reports[id] = report

  0%|          | 0/300 [00:00<?, ?it/s]

In [19]:
analysis_reports[10]

{'./files/question-10-pass-1.py': [{'code': 'E225',
   'filename': './files/question-10-pass-1.py',
   'line_number': 11,
   'column_number': 23,
   'text': 'missing whitespace around operator',
   'physical_line': '            if b[i][j]!= b[i][j-1]:\n'},
  {'code': 'W292',
   'filename': './files/question-10-pass-1.py',
   'line_number': 16,
   'column_number': 17,
   'text': 'no newline at end of file',
   'physical_line': '    return swaps'}]}

In [20]:
train_df['report'] = train_df['id'].map(lambda x: analysis_reports[x])

train_df.head()

Unnamed: 0,id,text,code,question,functions,code_context,context,llama_prompt,response,response_code,report
0,0,Maximum Prefix Sum possible by merging two giv...,"def maxPresum(a, b):\n X = max(a[0], 0)\n ...",Maximum Prefix Sum possible by merging two giv...,"['def maxPresum(a, b):']","def maxPresum(a, b):","def maxPresum(a, b):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"def maxPresum(a, b):\n max_sum = 0\n for...",{'./files/question-0-pass-1.py': [{'code': 'W2...
1,1,Check if a number can be represented as sum of...,import math\n\n\ndef sumOfTwoCubes(n):\n lo...,Check if a number can be represented as sum of...,['def sumOfTwoCubes(n):'],import math\n\n\ndef sumOfTwoCubes(n):,import math\n\n\ndef sumOfTwoCubes(n):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,import math\n\n\ndef sumOfTwoCubes(n):\n fo...,{'./files/question-1-pass-1.py': [{'code': 'W2...
2,2,Generate an N | Python3 program for the above ...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,Generate an N.,"['def sieveOfPrimes():', 'def getArray(arr, N):']",sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,{'./files/question-2-pass-1.py': [{'code': 'W2...
3,3,Nth natural number after removing all numbers ...,def findNthNumber(N):\n result = 0\n p =...,Nth natural number after removing all numbers ...,['def findNthNumber(N):'],def findNthNumber(N):,def findNthNumber(N):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,def findNthNumber(N):\n nth_number = 1\n ...,{'./files/question-3-pass-1.py': [{'code': 'W2...
4,4,Check if an integer is rotation of another giv...,"import math\n\n\ndef check(A, B):\n if (A =...",Check if an integer is rotation of another giv...,"['def check(A, B):']","import math\n\n\ndef check(A, B):","import math\n\n\ndef check(A, B):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"import math\n\n\ndef check(A, B):\n if A ==...",{'./files/question-4-pass-1.py': [{'code': 'E2...


In [21]:
type(train_df.loc[0]['report'])

dict

In [22]:
def build_feedback(report):
    message = "Regenerate the entire code after fixing the following errors: \n"
    
    statements = []
    root_key = next(iter(report))
    errors = report[root_key]
    for i, error in enumerate(errors):
        error_line = ""
        if error['physical_line']:
            error_line = error['physical_line'].strip('\n')
        ln = min(len(error_line), 10)
        statement = f"{i+1}. {error['text']} at line no. {error['line_number']} around code - `{error_line[:ln]}`"
        statements.append(statement)
        
    message = message + '\n'.join(statements)
    message += "\nWrap around code in [PYTHON] and [/PYTHON]."
    return message

In [23]:
print(build_feedback(train_df.loc[32]['report']))

Regenerate the entire code after fixing the following errors: 
1. 'bisect.bisect_left' imported but unused at line no. 1 around code - `from bisec`
2. no newline at end of file at line no. 10 around code - `    return`
Wrap around code in [PYTHON] and [/PYTHON].


In [24]:
def build_prompt(question, code_context, response_code, feedback):
    prompt = f"# {question}\n{code_context}\n"
    instruction1 = {
      "role": "user",
      "content": prompt,
    }
    response1 = {
        "role": "agent",
        "content": response_code
    }
    instruction2 = {
      "role": "user",
      "content": f"{feedback} \n{prompt}",
    }
    llama_prompt = llama_v2_prompt([instruction1, response1, instruction2])
    return llama_prompt

In [25]:
train_df['feedback'] = train_df['report'].apply(lambda x: build_feedback(x))
train_df['llama_prompt_with_feedback'] = train_df.apply(lambda row: build_prompt(row.question, row.code_context, row.response_code, row.feedback), axis = 1)
train_df.head()

Unnamed: 0,id,text,code,question,functions,code_context,context,llama_prompt,response,response_code,report,feedback,llama_prompt_with_feedback
0,0,Maximum Prefix Sum possible by merging two giv...,"def maxPresum(a, b):\n X = max(a[0], 0)\n ...",Maximum Prefix Sum possible by merging two giv...,"['def maxPresum(a, b):']","def maxPresum(a, b):","def maxPresum(a, b):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"def maxPresum(a, b):\n max_sum = 0\n for...",{'./files/question-0-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
1,1,Check if a number can be represented as sum of...,import math\n\n\ndef sumOfTwoCubes(n):\n lo...,Check if a number can be represented as sum of...,['def sumOfTwoCubes(n):'],import math\n\n\ndef sumOfTwoCubes(n):,import math\n\n\ndef sumOfTwoCubes(n):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,import math\n\n\ndef sumOfTwoCubes(n):\n fo...,{'./files/question-1-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
2,2,Generate an N | Python3 program for the above ...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,Generate an N.,"['def sieveOfPrimes():', 'def getArray(arr, N):']",sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,{'./files/question-2-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
3,3,Nth natural number after removing all numbers ...,def findNthNumber(N):\n result = 0\n p =...,Nth natural number after removing all numbers ...,['def findNthNumber(N):'],def findNthNumber(N):,def findNthNumber(N):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,def findNthNumber(N):\n nth_number = 1\n ...,{'./files/question-3-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
4,4,Check if an integer is rotation of another giv...,"import math\n\n\ndef check(A, B):\n if (A =...",Check if an integer is rotation of another giv...,"['def check(A, B):']","import math\n\n\ndef check(A, B):","import math\n\n\ndef check(A, B):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"import math\n\n\ndef check(A, B):\n if A ==...",{'./files/question-4-pass-1.py': [{'code': 'E2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...


In [26]:
# print(train_df.loc[32]['llama_prompt_with_feedback'])

In [27]:
prompts = train_df['llama_prompt_with_feedback'].values.tolist()
prompts_dic = {'prompt': prompts}
prompts_dataset = Dataset.from_dict(prompts_dic)

In [28]:
prompts_dataset[0]

{'prompt': "<s>[INST]<<SYS>>\nProvide answers in Python. Wrap around code in [PYTHON] and [/PYTHON]. Don't write any comments in generated code.\n<</SYS>>\n\n# Maximum Prefix Sum possible by merging two given arrays.\ndef maxPresum(a, b):[/INST]def maxPresum(a, b):\n    max_sum = 0\n    for i in range(len(a)):\n        for j in range(len(b)):\n            current_sum = a[i] + b[j]\n            if current_sum > max_sum:\n                max_sum = current_sum\n    return max_sum</s><s>[INST]Regenerate the entire code after fixing the following errors: \n1. no newline at end of file at line no. 8 around code - `    return`\nWrap around code in [PYTHON] and [/PYTHON]. \n# Maximum Prefix Sum possible by merging two given arrays.\ndef maxPresum(a, b):[/INST]\n"}

In [29]:
responses = run_code_generation(prompts_dataset, CODE_GENERATION_PATH, n_passes=1)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Total batches = 150


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 1
Processed batch: 2


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 3
Processed batch: 4


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 5
Processed batch: 6


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 7
Processed batch: 8


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 9
Processed batch: 10


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 11
Processed batch: 12


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 13
Processed batch: 14


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 15
Processed batch: 16


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 17
Processed batch: 18


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 19
Processed batch: 20


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 21
Processed batch: 22


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 23
Processed batch: 24


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 25
Processed batch: 26


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 27
Processed batch: 28


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 29
Processed batch: 30


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 31
Processed batch: 32


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 33
Processed batch: 34


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 35
Processed batch: 36


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 37
Processed batch: 38


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 39
Processed batch: 40


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 41
Processed batch: 42


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 43
Processed batch: 44


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 45
Processed batch: 46


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 47
Processed batch: 48


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 49
Processed batch: 50


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 51
Processed batch: 52


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 53
Processed batch: 54


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 55
Processed batch: 56


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 57
Processed batch: 58


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 59
Processed batch: 60


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 61
Processed batch: 62


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 63
Processed batch: 64


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 65
Processed batch: 66


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 67
Processed batch: 68


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 69
Processed batch: 70


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 71
Processed batch: 72


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 73
Processed batch: 74


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 75
Processed batch: 76


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 77
Processed batch: 78


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 79
Processed batch: 80


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 81
Processed batch: 82


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 83
Processed batch: 84


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 85
Processed batch: 86


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 87
Processed batch: 88


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 89
Processed batch: 90


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 91
Processed batch: 92


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 93
Processed batch: 94


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 95
Processed batch: 96


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 97
Processed batch: 98


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 99
Processed batch: 100


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 101
Processed batch: 102


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 103
Processed batch: 104


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 105
Processed batch: 106


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 107
Processed batch: 108


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 109
Processed batch: 110


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 111
Processed batch: 112


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 113
Processed batch: 114


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 115
Processed batch: 116


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 117
Processed batch: 118


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 119
Processed batch: 120


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 121
Processed batch: 122


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 123
Processed batch: 124


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 125
Processed batch: 126


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 127
Processed batch: 128


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 129
Processed batch: 130


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 131
Processed batch: 132


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 133
Processed batch: 134


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 135
Processed batch: 136


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 137
Processed batch: 138


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 139
Processed batch: 140


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 141
Processed batch: 142


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 143
Processed batch: 144


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 145
Processed batch: 146


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 147
Processed batch: 148


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 149
Processed batch: 150


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 151
Processed batch: 152


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 153
Processed batch: 154


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 155
Processed batch: 156


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 157
Processed batch: 158


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 159
Processed batch: 160


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 161
Processed batch: 162


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 163
Processed batch: 164


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 165
Processed batch: 166


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 167
Processed batch: 168


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 169
Processed batch: 170


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 171
Processed batch: 172


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 173
Processed batch: 174


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 175
Processed batch: 176


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 177
Processed batch: 178


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 179
Processed batch: 180


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 181
Processed batch: 182


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 183
Processed batch: 184


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 185
Processed batch: 186


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 187
Processed batch: 188


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 189
Processed batch: 190


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 191
Processed batch: 192


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 193
Processed batch: 194


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 195
Processed batch: 196


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 197
Processed batch: 198


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 199
Processed batch: 200


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 201
Processed batch: 202


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 203
Processed batch: 204


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 205
Processed batch: 206


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 207
Processed batch: 208


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 209
Processed batch: 210


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 211
Processed batch: 212


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 213
Processed batch: 214


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 215
Processed batch: 216


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 217
Processed batch: 218


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 219
Processed batch: 220


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 221
Processed batch: 222


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 223
Processed batch: 224


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 225
Processed batch: 226


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 227
Processed batch: 228


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 229
Processed batch: 230


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 231
Processed batch: 232


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 233
Processed batch: 234


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 235
Processed batch: 236


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 237
Processed batch: 238


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 239
Processed batch: 240


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 241
Processed batch: 242


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 243
Processed batch: 244


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 245
Processed batch: 246


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 247
Processed batch: 248


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 249
Processed batch: 250


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 251
Processed batch: 252


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 253
Processed batch: 254


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 255
Processed batch: 256


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 257
Processed batch: 258


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 259
Processed batch: 260


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 261
Processed batch: 262


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 263
Processed batch: 264


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 265
Processed batch: 266


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 267
Processed batch: 268


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 269
Processed batch: 270


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 271
Processed batch: 272


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 273
Processed batch: 274


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 275
Processed batch: 276


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 277
Processed batch: 278


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 279
Processed batch: 280


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 281
Processed batch: 282


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 283
Processed batch: 284


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 285
Processed batch: 286


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 287
Processed batch: 288


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 289
Processed batch: 290


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 291
Processed batch: 292


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 293
Processed batch: 294


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 295
Processed batch: 296


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch: 297
Processed batch: 298
Processed batch: 299
Processed batch: 300


In [30]:
len(responses)

300

In [31]:
train_df['response_feedback'] = responses
train_df.head()

Unnamed: 0,id,text,code,question,functions,code_context,context,llama_prompt,response,response_code,report,feedback,llama_prompt_with_feedback,response_feedback
0,0,Maximum Prefix Sum possible by merging two giv...,"def maxPresum(a, b):\n X = max(a[0], 0)\n ...",Maximum Prefix Sum possible by merging two giv...,"['def maxPresum(a, b):']","def maxPresum(a, b):","def maxPresum(a, b):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"def maxPresum(a, b):\n max_sum = 0\n for...",{'./files/question-0-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
1,1,Check if a number can be represented as sum of...,import math\n\n\ndef sumOfTwoCubes(n):\n lo...,Check if a number can be represented as sum of...,['def sumOfTwoCubes(n):'],import math\n\n\ndef sumOfTwoCubes(n):,import math\n\n\ndef sumOfTwoCubes(n):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,import math\n\n\ndef sumOfTwoCubes(n):\n fo...,{'./files/question-1-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
2,2,Generate an N | Python3 program for the above ...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,Generate an N.,"['def sieveOfPrimes():', 'def getArray(arr, N):']",sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,sieve = [1] * (1000000 + 1)\n\n\ndef sieveOfPr...,{'./files/question-2-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
3,3,Nth natural number after removing all numbers ...,def findNthNumber(N):\n result = 0\n p =...,Nth natural number after removing all numbers ...,['def findNthNumber(N):'],def findNthNumber(N):,def findNthNumber(N):,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,def findNthNumber(N):\n nth_number = 1\n ...,{'./files/question-3-pass-1.py': [{'code': 'W2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...
4,4,Check if an integer is rotation of another giv...,"import math\n\n\ndef check(A, B):\n if (A =...",Check if an integer is rotation of another giv...,"['def check(A, B):']","import math\n\n\ndef check(A, B):","import math\n\n\ndef check(A, B):",<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,"import math\n\n\ndef check(A, B):\n if A ==...",{'./files/question-4-pass-1.py': [{'code': 'E2...,Regenerate the entire code after fixing the fo...,<s>[INST]<<SYS>>\nProvide answers in Python. W...,<s>[INST]<<SYS>>\nProvide answers in Python. W...


In [32]:
train_df.to_csv('/kaggle/working/train_response_with_feedback.csv')

### Testing on Several Queries

### Code Infilling

For Future...

In [33]:
# from transformers import pipeline
# import torch

# generator = pipeline("text-generation",model="codellama/CodeLlama-7b-hf",torch_dtype=torch.float16, device_map="auto")
# # generator('def remove_non_ascii(s: str) -> str:\n    """ <FILL_ME>\n    return result', max_new_tokens = 128, return_type = 1)

In [34]:
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import transformers
# import torch

# model_id = "codellama/CodeLlama-7b-hf"
# tokenizer2 = AutoTokenizer.from_pretrained(model_id)
# model2 = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     torch_dtype=torch.float16
# ).to("cuda")




In [35]:
# prompt = '''def remove_non_ascii(s: str) -> str:
#     """ <FILL_ME>
#     return result
# '''

# input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to("cuda")
# output = model.generate(
#     input_ids,
#     max_new_tokens=200,
# )
# output = output[0].to("cpu")

# filling = tokenizer.decode(output[input_ids.shape[1]:], skip_special_tokens=True)


In [36]:
# print(prompt.replace("<FILL_ME>", filling))