# Set Up for running experiments on Google Colab

#### Step 1: Start by importing the .env file

Ensure that you have the fields filled in "MONGODB_URI", "GOOGLE_COLAB_HUGGINGFACE_TOKEN", "GITHUB_USERNAME", "GITHUB_BRANCH_NAME" and "GITHUB_PAT" filled in.

In [None]:
from google.colab import files
files.upload()

#### Step 2: Install python-dotenv package and load the dotenv

In [None]:
! pip install python-dotenv

In [None]:
from dotenv import load_dotenv

load_dotenv()

#### Step 3: Cloning the repository

In [None]:
# 1) Paste your GitHub PAT securely (no echo in output)
import os, subprocess

GITHUB_USER = os.getenv('GITHUB_USERNAME')
GITHUB_BRANCH_NAME = os.getenv("GITHUB_BRANCH_NAME")

GH_TOKEN = os.getenv("GITHUB_PAT")

# 2) Clone the specific branch (hide output so token isn't printed)
url = f"https://{GITHUB_USER}:{GH_TOKEN}@github.com/your-org/your-repo.git"
cmd = ["git","clone","-b", GITHUB_BRANCH_NAME, "--single-branch", "--depth","1", url]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

# # 3) (Optional) Remove token from the saved remote to avoid accidental leaks
# import pathlib, shlex, json
# repo_dir = pathlib.Path(REPO)
# subprocess.run(["git","-C", str(repo_dir), "remote","set-url","origin",
#                 f"https://github.com/{GH_USER}/{REPO}.git"], check=True)

#### Step 4: Change directory to the cloned Github Repo

In [None]:
%cd {"your-repo"}

#### Step 5: Pip install the necessary packages from requirements-colab.txt

In [None]:
! pip install -r requirements-colab.txt

#### Step 6: Login into HuggingFace

In [None]:
import os
from huggingface_hub import login

hf_token = os.getenv('GOOGLE_COLAB_HUGGINGFACE_TOKEN')

# Login to Hugging Face
login(token=hf_token)

#### Step 7: Downloading the desired model.

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

#### Step 8: Ensuring that the model works

In [None]:
import torch 

if torch.cuda.is_available():
    model = model.to("cuda")

# test prompt
prompt = "The capital of France is"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=20,
    do_sample=False,
)

decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_text)
del model

## FROM THIS STEP ON, COPY AND PASTE WHATEVER EXPERIMENT CELLS YOU NEED.

Do remember to do this step first before uploading into Google Colab

## Mutation Testing on Mistral LLM

Using the code generation database from the MongoDB, this notebook will run **zero shot, one shot** and **few shot prompts** on a Mistral LLM. Each prompt technique also includes **no mutation, sequential mutated** and **random mutated** programs. In total, 9 experiments are run through this notebook. All logs are stored in csv files automatically for your analysis.

In [None]:
import os
import sys

In [None]:
curr_dir = os.getcwd()
parent_dir = os.path.dirname(curr_dir)
proj_dir = os.path.dirname(parent_dir)
sys.path.append(proj_dir)

In [None]:
from code_generation.code_generation_tester import CodeGenerationTester
from llm_models.code_llms import Mistral
from code_generation.prompt_templates.prompt_template import OpenEndedPromptTemplate
from utility.constants import Tasks, PromptTypes, LexicalMutations, SyntacticMutations, LogicalMutations

In [None]:
RANDOM = LexicalMutations.RANDOM
SEQUENTIAL = LexicalMutations.SEQUENTIAL

## Declaring Task Type Constants
OUTPUT_PREDICTION = Tasks.OutputPrediction.NAME
INPUT_PREDICTION = Tasks.InputPrediction.NAME

## Declaring Prompt Type Constants
ZERO_SHOT = PromptTypes.ZERO_SHOT
ONE_SHOT = PromptTypes.ONE_SHOT
FEW_SHOT = PromptTypes.FEW_SHOT

## Declaring Mutation Constants
FOR2WHILE = SyntacticMutations.FOR2WHILE
FOR2ENUMERATE = SyntacticMutations.FOR2ENUMERATE

RANDOM_MUTATION = LexicalMutations.RANDOM
SEQUENTIAL_MUTATION = LexicalMutations.SEQUENTIAL
LITERAL_FORMAT = LexicalMutations.LITERAL_FORMAT

BOOLEAN_LITERAL = LogicalMutations.BOOLEAN_LITERAL
DEMORGAN = LogicalMutations.DEMORGAN
COMMUTATIVE_REORDER = LogicalMutations.COMMUTATIVE_REORDER
CONSTANT_UNFOLD = LogicalMutations.CONSTANT_UNFOLD
CONSTANT_UNFOLD_ADD = LogicalMutations.CONSTANT_UNFOLD_ADD
CONSTANT_UNFOLD_MULT = LogicalMutations.CONSTANT_UNFOLD_MULT

In [None]:
task_set = "HumanEval"

try:
    llmtester = CodeGenerationTester(f"{task_set}_Code_Generation")
except Exception as e:
    if task_set not in Tasks.CodeGeneration.BENCHMARKS:
        raise ValueError(f"An invalid task set was used. Only {Tasks.CodeGeneration.BENCHMARKS} are valid.")
    else:
        print(f'llmtester could not launch due to the following error: {e}')

In [None]:
num_tests = llmtester.question_database.count_documents({})
llm = Mistral()
model_name = "mistral-small-2506"
mistral_results =os.path.join(proj_dir + '/results/code_generation/mistral')
os.makedirs(mistral_results, exist_ok=True)

In [None]:
valid_mutations = Tasks.CodeGeneration.MUTATIONS
print("These are the valid mutation names:")
for idx, mutation in enumerate(valid_mutations):
    print(idx, mutation)

# Zero Shot Prompt Testing

In [None]:
# %%script false --no-raise-error
mutations = []
mutation_str = "_".join(mutations) if len(mutations) > 0 else "no_mutation"
prompt_type = ZERO_SHOT

pass_count = llmtester.run_code_generation_test(
    prompt_helper = OpenEndedPromptTemplate.zero_shot_prompt,
    num_tests=num_tests,
    # num_tests=10,
    mutations = mutations,
    prompt_type= prompt_type,
    output_file_path=f"{mistral_results}/{task_set}_{model_name}_{prompt_type}_{mutation_str}.csv",
    task_set = task_set,
)

print(f"For no mutations, {pass_count} number of test cases passed.")


In [None]:
# %%script false --no-raise-error
mutations = [RANDOM_MUTATION]
mutation_str = "_".join(mutations) if len(mutations) > 0 else "no_mutation"
prompt_type = ZERO_SHOT

pass_count = llmtester.run_code_generation_test(
    prompt_helper = OpenEndedPromptTemplate.zero_shot_prompt,
    num_tests=num_tests,
    # num_tests=10,
    mutations = mutations,
    prompt_type= prompt_type,
    output_file_path=f"{mistral_results}/{task_set}_{model_name}_{prompt_type}_{mutation_str}.csv",
    task_set = task_set,
)

print(f"For no mutations, {pass_count} number of test cases passed.")


In [None]:
# %%script false --no-raise-error
mutations = [SEQUENTIAL_MUTATION]
mutation_str = "_".join(mutations) if len(mutations) > 0 else "no_mutation"
prompt_type = ZERO_SHOT

pass_count = llmtester.run_code_generation_test(
    prompt_helper = OpenEndedPromptTemplate.zero_shot_prompt,
    num_tests=num_tests,
    # num_tests=10,
    mutations = mutations,
    prompt_type= prompt_type,
    output_file_path=f"{mistral_results}/{task_set}_{model_name}_{prompt_type}_{mutation_str}.csv",
    task_set = task_set,
)

print(f"For no mutations, {pass_count} number of test cases passed.")
