# Training Scripts

In [None]:
import os
from transformers import AutoModelForCausalLM
import torch
from transformers import AutoTokenizer
from datasets import load_dataset
from peft import (
    LoraConfig, PromptEncoderConfig, PrefixTuningConfig, IA3Config,
    get_peft_model,
)
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
import sys

def main():
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    model_name_or_path = "codellama/CodeLlama-7b-hf"

    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", torch_dtype=torch.bfloat16)
    # model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16})

    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    dataset = load_dataset("ntt2402vn/sm_2k8", split="train")
    dataset = dataset.shuffle(seed=42)

    split_dataset = dataset.train_test_split(train_size=2896, test_size=576, seed=42)
    train_dataset = split_dataset["train"]
    eval_dataset = split_dataset["test"]

    tokenizer.add_eos_token = True

    if model_name_or_path == "deepseek-ai/deepseek-coder-6.7b-base" or model_name_or_path == "deepseek-ai/deepseek-coder-6.7b-instruct":
        tokenizer.pad_token_id = 32018 
    else:
        tokenizer.pad_token_id = 0
    tokenizer.padding_side = "right"

    def tokenize(prompt, add_eos_token=True):
        result = tokenizer(
            prompt,
            truncation=True,
            max_length=10031,
            padding=False,
            return_tensors=None,
        )
        if (
                result["input_ids"][-1] != tokenizer.eos_token_id
                and len(result["input_ids"]) < 10031
                and add_eos_token
            ):
                result["input_ids"].append(tokenizer.eos_token_id)
                result["attention_mask"].append(1)

        result["labels"] = result["input_ids"].copy()

        return result

    def generate_prompt(data):
        full_prompt =f"""You are an expert Solidity auditor specializing in smart contract security. Your task is to analyze Solidity smart contracts for vulnerabilities.

        @@ Instruction:
        Given a smart contract, identify all potential security risks:
        {data["input"]}

        @@ Response:
        {data["output"]}
    """
        full = tokenize(full_prompt)
        question = tokenize(f"""You are an expert Solidity auditor specializing in smart contract security. Your task is to analyze Solidity smart contracts for vulnerabilities.

        @@ Instruction:
        Given a smart contract, identify all potential security risks:
        {data["input"]}

        @@ Response:
        """)

        question_len = len(question['input_ids'])

        full["labels"] = [
            -100
        ] * question_len + full["labels"][
            question_len:
        ]  
        return full

    tokenized_train_dataset = train_dataset.map(generate_prompt, remove_columns=['input','output'])
    tokenized_val_dataset = eval_dataset.map(generate_prompt, remove_columns=['input','output'])


    model.train()  # put model back into training mode

    # model = prepare_model_for_kbit_training(model)

    config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
    )

    # config = IA3Config(
    #         peft_type="IA3",
    #         task_type="CAUSAL_LM",
    #         )

    # Apply LoRA to the model
    model = get_peft_model(model, config)

    output_dir = "/drive/MyDrive/my_model_lora"

    training_args = TrainingArguments(
            per_device_train_batch_size=3,
            per_device_eval_batch_size=3,
            gradient_accumulation_steps=1,
            warmup_ratio=0.05,
            num_train_epochs= 6,
            learning_rate=2e-5,
            lr_scheduler_type="cosine",
            fp16=False,
            bf16= True,
            optim="adamw_torch",
            eval_strategy="steps", 
            save_strategy="no",
            eval_steps=0.2,
            output_dir=output_dir,
            load_best_model_at_end=False,
            group_by_length=True, 
            report_to="none", 
            run_name=None, 
            gradient_checkpointing=True,
            dataloader_drop_last=True,
            dataloader_pin_memory=True,
            disable_tqdm = False,
            dataloader_num_workers=4,
        )

    trainer = Trainer(
        model=model,
        train_dataset=tokenized_train_dataset,
        eval_dataset=tokenized_val_dataset,
        args=training_args,
        data_collator=DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )

    model.config.use_cache = False

    if torch.__version__ >= "2" and sys.platform != "win32":
        print("compiling the model")
        model = torch.compile(model)
    # for param in model.parameters():
    #     param.requires_grad = True

    trainer.train()

    model.save_pretrained(output_dir)
main()


# Inference Script

## For RQ1

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from peft import PeftModel,PeftConfig
from datetime import datetime
import re

device = "cuda"
model_name_or_path = "codellama/CodeLlama-7b-Instruct-hf"

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch


# model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16})
# model = get_peft_model(model, peft_config)
# model.print_trainable_parameters()
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token_id = 0 
tokenizer.padding_side = "right"

ADAPTER_PATH = "my_model_qlora"
# model = AutoPeftModelForCausalLM.from_pretrained(ADAPTER_PATH,device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16})
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map=device,torch_dtype=torch.bfloat16)

print(model.state_dict().keys())
class Prompter:
    PROMPT_TEMPLATE = """You are an expert Solidity auditor specializing in smart contract security. Your task is to analyze Solidity smart contracts for vulnerabilities.

@@ Instruction
{instruction}

@@ Response
"""
    @staticmethod
    def generate_prompt(instruction: str) -> str:
        return Prompter.PROMPT_TEMPLATE.format(instruction=instruction)

def generate_benchmark_prompt(text: str) -> str:
    BENCHMARK_PROMPT = """Given the smart contract below, identify all potential security risks:
    {problem}"""
    formatted_text = BENCHMARK_PROMPT.format(problem=text)
    return Prompter.generate_prompt(instruction=formatted_text)

benchmark = json.load(open('benchmark_SB_CURATED.json', 'r'))

model.to("cuda")
for k in range(len(benchmark)):
    inputs = tokenizer(generate_benchmark_prompt(benchmark[k]['Code']), truncation=True,
                    max_length=8196,
                    padding=False,
                    return_tensors="pt",
                )
    eos_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
    generated_ids = model.generate(
        input_ids=inputs['input_ids'].cuda(),
        attention_mask=inputs['attention_mask'].cuda(),
        max_new_tokens= 500,
        num_beams=10,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=eos_id,
    )

    all_output = []
    for generated_id in generated_ids:
        text = tokenizer.decode(generated_id[len(inputs[0]):], skip_special_tokens=True, clean_up_tokenization_spaces=False)
        all_output.append(text)
    benchmark[k]['output'] = all_output

# with open(f'codellama_base_smSBCURATED.json', "w") as f:
#     json.dump(benchmark, f, indent=2)



## For RQ2 and RQ3

### ICL

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from peft import PeftModel,PeftConfig
from datetime import datetime
import re
# from humaneval_patch_validate import validate_humaneval
# from quixbugs_patch_validate import validate_quixbugs
# from  defects4j_patch_validate import validate_defects4j
# from mbjp_apr_patch_validate import validate_mbjp_apr
# from result_look import cal_result

BENCHMARK_PROMPT = """Write a solution to the following coding problem:
The input is buggy code, which bug lines start from '// buggy lines start' and end at '// buggy lines end'. Please fix the follwing code.
{problem}"""

def create_model_and_tokenizer(model_name_or_path, model_type, load_in_4bit=True):
    """Create model and tokenizer."""
    model = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path=model_name_or_path,
        torch_dtype=torch.float16,
        load_in_4bit=load_in_4bit,
        device_map="auto",
    )
    # model = prepare_model_for_kbit_training(model)
    tokenizer = AutoTokenizer.from_pretrained(
        pretrained_model_name_or_path=model_name_or_path
    )
    if model_type == 'deepseek-coder-6.7b-base':
        tokenizer.pad_token_id = 32018 #"<pad>"
    else:
        tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
    tokenizer.padding_side = "right"
    print(model_type + f' pad token id is {tokenizer.pad_token_id}')
    return model, tokenizer

device = "cuda"
model_name_or_path = "codellama/CodeLlama-7b-Instruct-hf"
tokenizer_name_or_path = "codellama/CodeLlama-7b-hf"

# creating model
# peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch


# model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16})
# model = get_peft_model(model, peft_config)
# model.print_trainable_parameters()
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
tokenizer.padding_side = "right"

ADAPTER_PATH = "my_model_qlora"
# model = AutoPeftModelForCausalLM.from_pretrained(ADAPTER_PATH,device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16})
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map=device,torch_dtype=torch.bfloat16)

print(model.state_dict().keys())
class Prompter:
    PROMPT_TEMPLATE = """You are an expert Solidity auditor specializing in smart contract security. Your task is to analyze Solidity smart contracts for vulnerabilities.

{instruction}

"""

    @staticmethod
    def generate_prompt(instruction: str) -> str:
        return Prompter.PROMPT_TEMPLATE.format(instruction=instruction)

    @staticmethod
    def get_response(output: str) -> str:
        RESPONSE_SPLIT = "@@ Response\n"
        return output.split(RESPONSE_SPLIT)[1].strip() if RESPONSE_SPLIT in output else ""

def generate_benchmark_prompt(text: str) -> str:
    BENCHMARK_PROMPT = """
    {ICLs}

    ```solidity
    {problem}
    ```
    """
    ICL = """
    ## Reentrancy

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract ReentrancyExample {
        mapping(address => uint) public balances;

        function deposit() public payable {
            balances[msg.sender] += msg.value;
        }

        function withdraw(uint _amount) public {
            require(balances[msg.sender] >= _amount, "Insufficient balance");
            msg.sender.call.value(_amount)("");
            balances[msg.sender] -= _amount;
        }
    }
    ```

    [Output]:
    [Reentrancy]:
    [Function/Line]:
    ```solidity
    msg.sender.call.value(_amount)("");
    ```
    [Explanation]:
    The balance is updated after the transfer, allowing potential reentrancy attacks.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract ReentrancyAttack {
        mapping(address => uint256) public userBalances;

        function depositFunds() public payable {
            userBalances[msg.sender] += msg.value;
        }

        function withdrawFunds(uint256 _amount) public {
            require(userBalances[msg.sender] >= _amount, "Insufficient funds");
            msg.sender.call.value(_amount)("");
            userBalances[msg.sender] -= _amount;
        }
    }
    ```

    [Output]:
    [Reentrancy]:
    [Function/Line]:
    ```solidity
    msg.sender.call.value(_amount)("");
    ```
    [Explanation]:
    The transfer happens before updating the balance, allowing reentrant functions to exploit.

    ## Time Manipulation

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract TimeManipulationExample {
        uint public deadline = now + 1 days;

        function extendDeadline(uint _extraTime) public {
            if (now < deadline) {
                deadline += _extraTime;
            }
        }
    }
    ```

    [Output]:
    [Time Manipulation]:
    [Function/Line]:
    ```solidity
    deadline += _extraTime;
    ```
    [Explanation]:
    Miners can manipulate the timestamp to extend the deadline arbitrarily.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract TimeBoost {
        uint public saleEndTime = now + 5 days;

        function extendSale(uint _days) public {
            if (now < saleEndTime) {
                saleEndTime += _days * 1 days;
            }
        }
    }
    ```

    [Output]:
    [Time Manipulation]:
    [Function/Line]:
    ```solidity
    saleEndTime += _days * 1 days;
    ```
    [Explanation]:
    Use of now allows arbitrary extension due to miner-controlled timestamps.

    ## Timestamp Dependence

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract TimestampGame {
        uint public result;

        function play() public {
            if (now % 2 == 0) {
                result = 1;
            } else {
                result = 0;
            }
        }
    }
    ```

    [Output]:
    [Timestamp Dependence]:
    [Function/Line]:
    ```solidity
    if (now % 2 == 0) {
    ```
    [Explanation]:
    Decisions based on timestamps can be influenced by miners for predictable outcomes.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract PredictableOutcome {
        uint public outcome;

        function determineOutcome() public {
            if (now % 5 == 0) {
                outcome = 100;
            } else {
                outcome = 50;
            }
        }
    }
    ```

    [Output]:
    [Timestamp Dependence]:
    [Function/Line]:
    ```solidity
    if (now % 5 == 0) {
    ```
    [Explanation]:
    Miner-controlled timestamps create predictable outcomes, reducing randomness.

    ## Authorization

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract AuthorizationProblem {
        address public owner;

        function changeOwner(address _newOwner) public {
            owner = _newOwner;
        }
    }
    ```

    [Output]:
    [Authorization]:
    [Function/Line]:
    ```solidity
    owner = _newOwner;
    ```
    [Explanation]:
    No access control exists, allowing any user to change the contract owner.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract AdminControl {
        address public admin;

        function setAdmin(address _newAdmin) public {
            admin = _newAdmin;
        }
    }
    ```

    [Output]:
    [Authorization]:
    [Function/Line]:
    ```solidity
    admin = _newAdmin;
    ```
    [Explanation]:
    Lack of restrictions allows anyone to become the admin, opening the contract to misuse.

    ## Unhandled Exception

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract UncheckedSend {
        function sendPayment(address recipient, uint amount) public {
            recipient.send(amount);
        }
    }
    ```

    [Output]:
    [Unhandled Exception]:
    [Function/Line]:
    ```solidity
    recipient.send(amount);
    ```
    [Explanation]:
    The send method fails silently, potentially leading to lost ether without corrective action.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract MissingRevert {
        function executeTransfer(address recipient, uint amount) public {
            recipient.send(amount);
        }
    }
    ```

    [Output]:
    [Unhandled Exception]:
    [Function/Line]:
    ```solidity
    recipient.send(amount);
    ```
    [Explanation]:
    Usage of send without checking for success means errors go unnoticed, risking ether loss.

    ## Denial of Service

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract DOSWithRevert {
        mapping(address => uint) public balances;
        address[] public users;

        function deposit() public payable {
            balances[msg.sender] += msg.value;
            users.push(msg.sender);
        }

        function withdraw() public {
            require(balances[msg.sender] > 0, "Insufficient balance");
            for (uint i = 0; i < users.length; i++) {
                require(users[i] != msg.sender, "Cannot process withdrawal");
            }
            uint amount = balances[msg.sender];
            balances[msg.sender] = 0;
            msg.sender.transfer(amount);
        }
    }
    ```

    [Output]:
    [Denial of Service]:
    [Function/Line]:
    ```solidity
    require(users[i] != msg.sender, "Cannot process withdrawal");
    ```
    [Explanation]:
    The loop calling `require` on each user can cause a revert if many users exist, blocking legitimate withdrawals.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract UnboundedGasConsumption {
        mapping(address => uint) public points;
        address[] public players;

        function addPlayer(address _player) public {
            players.push(_player);
        }

        function rewardPoints() public {
            for (uint i = 0; i < players.length; i++) {
                points[players[i]] += 1;
            }
        }
    }
    ```

    [Output]:
    [Denial of Service]:
    [Function/Line]:
    ```solidity
    for (uint i = 0; i < players.length; i++) {
    ```
    [Explanation]:
    As more players are added, the loop inside `rewardPoints` can consume too much gas, causing the function to fail.
    """
    formatted_text = BENCHMARK_PROMPT.format(ICLs = ICL,problem=text)
    return Prompter.generate_prompt(instruction=formatted_text)

benchmark = json.load(open('benchmark_SB_CURATED.json', 'r'))

model.to("cuda")
for k in range(len(benchmark)):
    inputs = tokenizer(generate_benchmark_prompt(benchmark[k]['Code']), truncation=True,
                    max_length=8196,
                    padding=False,
                    return_tensors="pt",
                )
    eos_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
    generated_ids = model.generate(
        input_ids=inputs['input_ids'].cuda(),
        attention_mask=inputs['attention_mask'].cuda(),
        max_new_tokens= 500,
        num_beams=10,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=eos_id,
    )

    all_output = []
    for generated_id in generated_ids:
        text = tokenizer.decode(generated_id[len(inputs[0]):], skip_special_tokens=True, clean_up_tokenization_spaces=False)
        all_output.append(text)
    benchmark[k]['output'] = all_output

# with open(f'codellama_base_smSBCURATED.json', "w") as f:
#     json.dump(benchmark, f, indent=2)




### CoT

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from peft import PeftModel,PeftConfig
from datetime import datetime
import re
# from humaneval_patch_validate import validate_humaneval
# from quixbugs_patch_validate import validate_quixbugs
# from  defects4j_patch_validate import validate_defects4j
# from mbjp_apr_patch_validate import validate_mbjp_apr
# from result_look import cal_result

BENCHMARK_PROMPT = """Write a solution to the following coding problem:
The input is buggy code, which bug lines start from '// buggy lines start' and end at '// buggy lines end'. Please fix the follwing code.
{problem}"""

def create_model_and_tokenizer(model_name_or_path, model_type, load_in_4bit=True):
    """Create model and tokenizer."""
    model = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path=model_name_or_path,
        torch_dtype=torch.float16,
        load_in_4bit=load_in_4bit,
        device_map="auto",
    )
    # model = prepare_model_for_kbit_training(model)
    tokenizer = AutoTokenizer.from_pretrained(
        pretrained_model_name_or_path=model_name_or_path
    )
    if model_type == 'deepseek-coder-6.7b-base':
        tokenizer.pad_token_id = 32018 #"<pad>"
    else:
        tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
    tokenizer.padding_side = "right"
    print(model_type + f' pad token id is {tokenizer.pad_token_id}')
    return model, tokenizer

device = "cuda"
model_name_or_path = "codellama/CodeLlama-7b-Instruct-hf"
tokenizer_name_or_path = "codellama/CodeLlama-7b-hf"

# creating model
# peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch


# model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16})
# model = get_peft_model(model, peft_config)
# model.print_trainable_parameters()
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
tokenizer.padding_side = "right"

ADAPTER_PATH = "my_model_qlora"
# model = AutoPeftModelForCausalLM.from_pretrained(ADAPTER_PATH,device_map="auto",quantization_config={"load_in_4bit": True, "bnb_4bit_compute_dtype": torch.bfloat16})
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map=device,torch_dtype=torch.bfloat16)

print(model.state_dict().keys())
class Prompter:
    PROMPT_TEMPLATE = """You are an expert Solidity auditor specializing in smart contract security. Your task is to analyze Solidity smart contracts for vulnerabilities.

{instruction}

"""

    @staticmethod
    def generate_prompt(instruction: str) -> str:
        return Prompter.PROMPT_TEMPLATE.format(instruction=instruction)

    @staticmethod
    def get_response(output: str) -> str:
        RESPONSE_SPLIT = "@@ Response\n"
        return output.split(RESPONSE_SPLIT)[1].strip() if RESPONSE_SPLIT in output else ""

def generate_benchmark_prompt(text: str) -> str:
    BENCHMARK_PROMPT = """
    {ICLs}

    ```solidity
    {problem}
    ```
    """
    ICL = """
    ## Reentrancy

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract ReentrancyExample {
        mapping(address => uint) public balances;

        function deposit() public payable {
            balances[msg.sender] += msg.value;
        }

        function withdraw(uint _amount) public {
            require(balances[msg.sender] >= _amount, "Insufficient balance");
            msg.sender.call.value(_amount)("");
            balances[msg.sender] -= _amount;
        }
    }
    ```

    [Output]:
    [Reentrancy]:
    [Function/Line]:
    ```solidity
    msg.sender.call.value(_amount)("");
    ```
    [Step-by-step Explanation]:
    1. The `withdraw` function first checks if the sender has enough balance.
    2. It then sends the specified `_amount` to `msg.sender` using `call`.
    3. Since the balance reduction occurs after the call, an attacker can reenter the function before the balance is updated.
    4. This allows repeated withdrawals, draining the contract.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract ReentrancyAttack {
        mapping(address => uint256) public userBalances;

        function depositFunds() public payable {
            userBalances[msg.sender] += msg.value;
        }

        function withdrawFunds(uint256 _amount) public {
            require(userBalances[msg.sender] >= _amount, "Insufficient funds");
            msg.sender.call.value(_amount)("");
            userBalances[msg.sender] -= _amount;
        }
    }
    ```

    [Output]:
    [Reentrancy]:
    [Function/Line]:
    ```solidity
    msg.sender.call.value(_amount)("");
    ```
    [Step-by-step Explanation]:
    1. The `withdrawFunds` function checks if the user has sufficient funds.
    2. It sends the withdrawal amount using `call`.
    3. Before updating `userBalances`, the vulnerable call enables repeated calls to `withdrawFunds` via reentrancy.
    4. This could lead to an attacker draining funds beyond their balance.

    ## Time Manipulation

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract TimeManipulationExample {
        uint public deadline = now + 1 days;

        function extendDeadline(uint _extraTime) public {
            if (now < deadline) {
                deadline += _extraTime;
            }
        }
    }
    ```

    [Output]:
    [Time Manipulation]:
    [Function/Line]:
    ```solidity
    deadline += _extraTime;
    ```
    [Step-by-step Explanation]:
    1. The contract uses `now` to set a deadline.
    2. The `extendDeadline` function allows adding extra time if the current `now` is less than `deadline`.
    3. Miners can adjust `now` values slightly, extending the deadline.
    4. This capability leads to arbitrary and unauthorized deadline extensions.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract TimeBoost {
        uint public saleEndTime = now + 5 days;

        function extendSale(uint _days) public {
            if (now < saleEndTime) {
                saleEndTime += _days * 1 days;
            }
        }
    }
    ```

    [Output]:
    [Time Manipulation]:
    [Function/Line]:
    ```solidity
    saleEndTime += _days * 1 days;
    ```
    [Step-by-step Explanation]:
    1. The sale period is set to expire in 5 days using `now`.
    2. The `extendSale` function permits adding days to `saleEndTime`.
    3. Since miners have control over `now`, they may manipulate timing to their advantage.
    4. This can result in extensions beyond intended limits.

    ## Timestamp Dependence

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract TimestampGame {
        uint public result;

        function play() public {
            if (now % 2 == 0) {
                result = 1;
            } else {
                result = 0;
            }
        }
    }
    ```

    [Output]:
    [Timestamp Dependence]:
    [Function/Line]:
    ```solidity
    if (now % 2 == 0) {
    ```
    [Step-by-step Explanation]:
    1. The function uses `now` to determine the game's result.
    2. Depending on whether `now` is even or odd, different outcomes are produced.
    3. Miners can set timestamps within reasonable limits to influence the game’s outcome.
    4. This behavior breaks randomness and can predictably alter results.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract PredictableOutcome {
        uint public outcome;

        function determineOutcome() public {
            if (now % 5 == 0) {
                outcome = 100;
            } else {
                outcome = 50;
            }
        }
    }
    ```

    [Output]:
    [Timestamp Dependence]:
    [Function/Line]:
    ```solidity
    if (now % 5 == 0) {
    ```
    [Step-by-step Explanation]:
    1. The contract uses the remainder from `now % 5` to set `outcome`.
    2. An exact time-derived condition determines the result.
    3. Miners can craft block timestamps fitting the criteria, achieving specific outcomes.
    4. This reduces the unpredictability, harming randomness and fairness.

    ## Authorization

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract AuthorizationProblem {
        address public owner;

        function changeOwner(address _newOwner) public {
            owner = _newOwner;
        }
    }
    ```

    [Output]:
    [Authorization]:
    [Function/Line]:
    ```solidity
    owner = _newOwner;
    ```
    [Step-by-step Explanation]:
    1. The contract’s `owner` can be changed via `changeOwner`.
    2. There are no checks on the caller of this function.
    3. Any user can call `changeOwner` and overwrite the `owner` address.
    4. This leads to complete loss of control over ownership.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract AdminControl {
        address public admin;

        function setAdmin(address _newAdmin) public {
            admin = _newAdmin;
        }
    }
    ```

    [Output]:
    [Authorization]:
    [Function/Line]:
    ```solidity
    admin = _newAdmin;
    ```
    [Step-by-step Explanation]:
    1. `setAdmin` changes the `admin` address.
    2. Without access restrictions, anyone can invoke this function.
    3. Unauthorized calls can replace the admin, making the contract susceptible to misuse.
    4. This lack of security results in a compromised administrative role.

    ## Unhandled Exception

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract UncheckedSend {
        function sendPayment(address recipient, uint amount) public {
            recipient.send(amount);
        }
    }
    ```

    [Output]:
    [Unhandled Exception]:
    [Function/Line]:
    ```solidity
    recipient.send(amount);
    ```
    [Step-by-step Explanation]:
    1. The `sendPayment` function tries to send a specified amount.
    2. The `send` method sends ether and returns a boolean based on success/failure.
    3. Here, the return value isn't checked; thus, sending failures go unnoticed.
    4. Missing failure handling can lead to undetected ether loss, impacting the sender.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract MissingRevert {
        function executeTransfer(address recipient, uint amount) public {
            recipient.send(amount);
        }
    }
    ```

    [Output]:
    [Unhandled Exception]:
    [Function/Line]:
    ```solidity
    recipient.send(amount);
    ```
    [Step-by-step Explanation]:
    1. `executeTransfer` calls the `send` method, transferring ether.
    2. The call’s outcome is neglected as its result isn’t evaluated.
    3. Without checking, failed sends might not trigger corrective actions.
    4. This oversight can result in ether losses, leading to financial discrepancies.

    ## Denial of Service

    ### Example 1

    ```solidity
    pragma solidity ^0.4.24;

    contract DOSWithRevert {
        mapping(address => uint) public balances;
        address[] public users;

        function deposit() public payable {
            balances[msg.sender] += msg.value;
            users.push(msg.sender);
        }

        function withdraw() public {
            require(balances[msg.sender] > 0, "Insufficient balance");
            for (uint i = 0; i < users.length; i++) {
                require(users[i] != msg.sender, "Cannot process withdrawal");
            }
            uint amount = balances[msg.sender];
            balances[msg.sender] = 0;
            msg.sender.transfer(amount);
        }
    }
    ```

    [Output]:
    [Denial of Service]:
    [Function/Line]:
    ```solidity
    require(users[i] != msg.sender, "Cannot process withdrawal");
    ```
    [Step-by-step Explanation]:
    1. The `withdraw` function aims to prevent duplicate withdrawals by iterating over `users`.
    2. It checks if the user already exists before allowing withdrawal.
    3. As the number of users increases, the loop may run out of gas.
    4. This excessive gas use can lead to a revert, blocking any withdrawals.
    5. Consequently, legitimate users cannot withdraw if the user list gets too large.

    ### Example 2

    ```solidity
    pragma solidity ^0.4.24;

    contract UnboundedGasConsumption {
        mapping(address => uint) public points;
        address[] public players;

        function addPlayer(address _player) public {
            players.push(_player);
        }

        function rewardPoints() public {
            for (uint i = 0; i < players.length; i++) {
                points[players[i]] += 1;
            }
        }
    }
    ```

    [Output]:
    [Denial of Service]:
    [Function/Line]:
    ```solidity
    for (uint i = 0; i < players.length; i++) {
    ```
    [Step-by-step Explanation]:
    1. The `rewardPoints` function increments points for each player.
    2. It uses a loop iterating over all `players`.
    3. As the number of players grows, the loop consumes an increasing amount of gas.
    4. Once the gas limit is reached, the function calls will fail.
    5. This failure effectively creates a denial of service, as updates to points cannot occur.
    """
    formatted_text = BENCHMARK_PROMPT.format(ICLs = ICL,problem=text)
    return Prompter.generate_prompt(instruction=formatted_text)

benchmark = json.load(open('benchmark_SB_CURATED.json', 'r'))

model.to("cuda")
for k in range(len(benchmark)):
    inputs = tokenizer(generate_benchmark_prompt(benchmark[k]['Code']), truncation=True,
                    max_length=8196,
                    padding=False,
                    return_tensors="pt",
                )
    eos_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
    generated_ids = model.generate(
        input_ids=inputs['input_ids'].cuda(),
        attention_mask=inputs['attention_mask'].cuda(),
        max_new_tokens= 500,
        num_beams=10,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=eos_id,
    )

    all_output = []
    for generated_id in generated_ids:
        text = tokenizer.decode(generated_id[len(inputs[0]):], skip_special_tokens=True, clean_up_tokenization_spaces=False)
        all_output.append(text)
    benchmark[k]['output'] = all_output

# with open(f'codellama_base_smSBCURATED.json', "w") as f:
#     json.dump(benchmark, f, indent=2)


