In [1]:
!pip install -q -U datasets

In [2]:
!pip install -q git+https://github.com/huggingface/transformers.git@main accelerate

### Loading the model and tokenizer

In [3]:
import datasets

In [4]:
from transformers import AutoTokenizer
import transformers
import torch
from tqdm import notebook
import pandas as pd
import numpy as np
from datasets import Dataset
from transformers.pipelines.pt_utils import KeyDataset



In [5]:
model_id = "codellama/CodeLlama-7b-Instruct-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)

Downloading (…)okenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

### Preparing the Pipeline

In [6]:
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    model_max_length=512,
    padding_side="left",
    add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

In [7]:
def tokenize(data):
    result = tokenizer(
        data['prompt'],
        truncation=True,
        max_length=512,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [8]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.float16,
    device_map="auto",
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [9]:
pipeline.tokenizer.pad_token_id = tokenizer.eos_token_id

### Generating Code

In [10]:
def llama_v2_prompt(
    messages: list[dict]
):
    B_INST, E_INST = "[INST]", "[/INST]"
    B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
    BOS, EOS = "<s>", "</s>"
    DEFAULT_SYSTEM_PROMPT = f"""Provide answers in CPP. Wrap around code in [CPP] and [/CPP]. Don't write any comments in generated code."""

    if messages[0]["role"] != "system":
        messages = [
            {
                "role": "system",
                "content": DEFAULT_SYSTEM_PROMPT,
            }
        ] + messages


    messages = [
        {
            "role": messages[1]["role"],
            "content": B_SYS + messages[0]["content"] + E_SYS + messages[1]["content"],
        }
    ] + messages[2:]

    messages_list = [
        f"{BOS}{B_INST}{(prompt['content']).strip()}{E_INST}{(answer['content']).strip()}{EOS}"
        for prompt, answer in zip(messages[::2], messages[1::2])
    ]
    messages_list.append(f"{BOS}{B_INST}{(messages[-1]['content']).strip()}{E_INST}\n")

    return "".join(messages_list)

In [11]:
def generate_code(dataset):
    responses = []
    batch_count = 0
    batch_size = 2
    print(f"Total batches = {len(dataset)//batch_size}")
    for out in pipeline(
      KeyDataset(dataset, "prompt"),
      do_sample=True,
      temperature=0.1,
      top_p=0.9,
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
      max_length=512,
      batch_size = batch_size
      ):
        batch_count += 1
        for o in out:
              responses.append(o['generated_text'])
        print('Processed batch[%d]'%batch_count, end='\r')
    return responses

In [12]:
def end_overlap(a, b):
    for i in range(0, len(a)):
        if b.startswith(a[i:]):
            return i
    return len(a)

def remove_prefix(s, prefix):
    prefix_end_idx = s.index(prefix) + len(prefix)
    return s[prefix_end_idx:]

def extract_code(prompt, response, code_context):
    B_CPP, E_CPP = '[CPP]', '[/CPP]'
    trimmed_response = remove_prefix(response, prompt)
    if trimmed_response.startswith(B_CPP):
        trimmed_response = remove_prefix(trimmed_response, B_CPP)
    try:
        suffix_idx = trimmed_response.index(E_CPP)
        trimmed_response = trimmed_response[:suffix_idx]
    except ValueError as ve:
          pass
    trimmed_response = trimmed_response.strip()
    code_context = code_context.strip()
    overlap_idx = end_overlap(code_context, trimmed_response)
    cleaned_response = code_context[:overlap_idx] + trimmed_response
    return cleaned_response

def save_file(path, code):
    with open(path, 'w') as file:
        file.write(code)

def run_code_generation(dataset, save_path, n_passes=5):
    responses = generate_code(dataset)
    return responses

In [13]:
CODE_GENERATION_PATH = "./generations/cpp"

In [14]:
train_df = pd.read_csv('/kaggle/input/cpp-train-processed/train_processed.csv', index_col=0)
train_df.head()

Unnamed: 0_level_0,text,code,question,code_cleaned,functions,code_context
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> \n using namespace st...,"['int maxPresum ( vector < int > a , vector < ...",#include <bits/stdc++.h> \n using namespace st...
1,Check if a number can be represented as sum of...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if a number can be represented as sum of...,#include <bits/stdc++.h> \n using namespace st...,"['bool sumOfTwoCubes ( int n ) {', 'int main (...",#include <bits/stdc++.h> \n using namespace st...
3,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> \n using namespace st...,"['long findNthNumber ( long long N ) {', 'int ...",#include <bits/stdc++.h> \n using namespace st...
4,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> \n using namespace st...,"['int check ( int A , int B ) {', 'int main ( ...",#include <bits/stdc++.h> \n using namespace st...
5,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> \n using namespace st...,"['void sameProductQuadruples ( int nums [ ] , ...",#include <bits/stdc++.h> \n using namespace st...


In [15]:
def build_prompt(question, code_context):
    prompt = f"# {question}\n{code_context}\n"
    instruction = {
      "role": "user",
      "content": prompt,
    }
    llama_prompt = llama_v2_prompt([instruction])
    return llama_prompt

In [16]:
def build_question_context(text, context):
    q_strs = text.split('|')
    q_strs = q_strs[1].split(';')[1:]
    return context

In [17]:
train_df_300 = train_df[0:300].copy()
train_df_300['context'] = train_df_300.apply(lambda row: build_question_context(row.text, row.code_context), axis=1)
train_df_300['llama_prompt'] = train_df_300.apply(lambda row: build_prompt(row.question, row.code_context), axis=1)
train_df_300.head()

Unnamed: 0_level_0,text,code,question,code_cleaned,functions,code_context,context,llama_prompt
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> \n using namespace st...,"['int maxPresum ( vector < int > a , vector < ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
1,Check if a number can be represented as sum of...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if a number can be represented as sum of...,#include <bits/stdc++.h> \n using namespace st...,"['bool sumOfTwoCubes ( int n ) {', 'int main (...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
3,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> \n using namespace st...,"['long findNthNumber ( long long N ) {', 'int ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
4,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> \n using namespace st...,"['int check ( int A , int B ) {', 'int main ( ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
5,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> \n using namespace st...,"['void sameProductQuadruples ( int nums [ ] , ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...


In [18]:
prompts = train_df_300['llama_prompt'].values.tolist()
prompts_dic = {'prompt': prompts}
prompts_dataset = Dataset.from_dict(prompts_dic)

In [19]:
prompts_dataset[0]

{'prompt': "<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap around code in [CPP] and [/CPP]. Don't write any comments in generated code.\n<</SYS>>\n\n# Maximum Prefix Sum possible by merging two given arrays.\n#include <bits/stdc++.h> \n using namespace std ;\n int maxPresum ( vector < int > a , vector < int > b ) {[/INST]\n"}

In [20]:
responses = run_code_generation(prompts_dataset, CODE_GENERATION_PATH, n_passes=1)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Total batches = 150


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[2]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[4]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[6]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[8]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[10]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[12]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[14]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[16]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[18]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[20]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[22]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[24]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[26]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[28]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[30]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[32]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[34]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[36]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[38]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[40]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[42]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[44]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[46]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[48]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[50]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[52]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[54]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[56]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[58]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[60]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[62]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[64]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[66]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[68]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[70]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[72]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[74]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[76]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[78]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[80]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[82]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[84]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[86]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[88]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[90]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[92]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[94]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[96]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[98]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[100]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[102]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[104]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[106]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[108]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[110]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[112]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[114]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[116]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[118]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[120]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[122]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[124]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[126]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[128]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[130]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[132]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[134]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[136]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[138]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[140]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[142]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[144]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[146]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[148]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[150]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[152]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[154]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[156]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[158]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[160]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[162]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[164]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[166]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[168]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[170]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[172]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[174]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[176]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[178]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[180]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[182]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[184]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[186]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[188]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[190]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[192]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[194]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[196]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[198]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[200]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[202]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[204]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[206]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[208]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[210]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[212]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[214]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[216]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[218]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[220]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[222]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[224]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[226]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[228]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[230]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[232]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[234]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[236]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[238]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[240]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[242]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[244]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[246]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[248]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[250]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[252]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[254]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[256]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[258]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[260]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[262]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[264]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[266]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[268]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[270]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[272]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[274]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[276]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[278]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[280]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[282]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[284]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[286]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[288]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[290]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[292]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[294]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[296]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Processed batch[300]

In [21]:
len(responses)

300

In [22]:
train_df_300['response'] = responses

train_df_300.head()

Unnamed: 0_level_0,text,code,question,code_cleaned,functions,code_context,context,llama_prompt,response
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Maximum Prefix Sum possible by merging two giv...,#include <bits/stdc++.h> \n using namespace st...,"['int maxPresum ( vector < int > a , vector < ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
1,Check if a number can be represented as sum of...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if a number can be represented as sum of...,#include <bits/stdc++.h> \n using namespace st...,"['bool sumOfTwoCubes ( int n ) {', 'int main (...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
3,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Nth natural number after removing all numbers ...,#include <bits/stdc++.h> \n using namespace st...,"['long findNthNumber ( long long N ) {', 'int ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
4,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> NEW_LINE using namesp...,Check if an integer is rotation of another giv...,#include <bits/stdc++.h> \n using namespace st...,"['int check ( int A , int B ) {', 'int main ( ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...
5,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> NEW_LINE using namesp...,Count of quadruples with product of a pair equ...,#include <bits/stdc++.h> \n using namespace st...,"['void sameProductQuadruples ( int nums [ ] , ...",#include <bits/stdc++.h> \n using namespace st...,#include <bits/stdc++.h> \n using namespace st...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...,<s>[INST]<<SYS>>\nProvide answers in CPP. Wrap...


In [23]:
train_df_300.to_csv('./train_response.csv')