# Set Up for running experiments on Google Colab

#### Step 1: Start by importing the .env file

Ensure that you have the fields filled in "mongoDB_uri", "collab_token", "GITHUB_USERNAME", "GITHUB_BRANCH_NAME" and "GITHUB_PAT" filled in.

In [None]:
from google.colab import files
files.upload()

#### Step 2: Install python-dotenv package and load the dotenv

In [None]:
! pip install python-dotenv

In [None]:
from dotenv import load_dotenv

load_dotenv()

#### Step 3: Cloning the repository

In [None]:
# 1) Paste your GitHub PAT securely (no echo in output)
import os, subprocess

GITHUB_USER = os.getenv('GITHUB_USERNAME')
GITHUB_BRANCH_NAME = os.getenv("GITHUB_BRANCH_NAME")

os.environ["GH_TOKEN"] = os.getenv("GITHUB_PAT")

# 2) Clone the specific branch (hide output so token isn't printed)
url = f"https://{GITHUB_USER}:{os.environ['GH_TOKEN']}@github.com/your-org/your-repo.git"
cmd = ["git","clone","-b", GITHUB_BRANCH_NAME, "--single-branch", "--depth","1", url]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

# # 3) (Optional) Remove token from the saved remote to avoid accidental leaks
# import pathlib, shlex, json
# repo_dir = pathlib.Path(REPO)
# subprocess.run(["git","-C", str(repo_dir), "remote","set-url","origin",
#                 f"https://github.com/{GH_USER}/{REPO}.git"], check=True)

#### Step 4: Change directory to the cloned Github Repo

In [None]:
%cd {"your-repo"}

#### Step 5: Pip install the necessary packages from requirements-colab.txt

In [None]:
! pip install -r requirements-colab.txt

#### Step 6: Login into HuggingFace

In [None]:
import os
from huggingface_hub import login

hf_token = os.getenv('collab_token')

# Login to Hugging Face
login(token=hf_token)

#### Step 7: Downloading the desired model.

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)

#### Step 8: Ensuring that the model works

In [None]:
import torch

if torch.cuda.is_available():
    model = model.to("cuda")

# test prompt
prompt = "The capital of France is"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=20,
    do_sample=False,
)

decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_text)
del model

## FROM THIS STEP ON, COPY AND PASTE WHATEVER EXPERIMENT CELLS YOU NEED.

Do remember to do this step first before uploading into Google Colab

## LLM Consistency Testing with Mistral LLM

This notebook contains code for testing code inconsistency in Mistral LLM

In [None]:
import os
import sys

In [None]:
curr_dir = os.getcwd()
parent_dir = os.path.dirname(curr_dir)
proj_dir = os.path.dirname(parent_dir)
sys.path.append(proj_dir)

In [None]:
from mcq_inconsistency.mcq_inconsistency_tester import LLMMCQInconsistencyTester
from mcq_inconsistency.prompt_templates.prompt_template import MCQInconsistencyPromptTemplate
from utility.constants import CodeMMLU, LexicalMutations, SyntacticMutations, LogicalMutations, PromptTypes, ReasoningModels, NonReasoningModels

# Declaring constants

In [None]:
## Declaring Prompt Type Constants
ZERO_SHOT = PromptTypes.ZERO_SHOT
ONE_SHOT = PromptTypes.ONE_SHOT
FEW_SHOT = PromptTypes.FEW_SHOT

## Declaring Mutation Constants
FOR2WHILE = SyntacticMutations.FOR2WHILE
FOR2ENUMERATE = SyntacticMutations.FOR2ENUMERATE

RANDOM_MUTATION = LexicalMutations.RANDOM
SEQUENTIAL_MUTATION = LexicalMutations.SEQUENTIAL
LITERAL_FORMAT = LexicalMutations.LITERAL_FORMAT

BOOLEAN_LITERAL = LogicalMutations.BOOLEAN_LITERAL
DEMORGAN = LogicalMutations.DEMORGAN
COMMUTATIVE_REORDER = LogicalMutations.COMMUTATIVE_REORDER
CONSTANT_UNFOLD = LogicalMutations.CONSTANT_UNFOLD
CONSTANT_UNFOLD_ADD = LogicalMutations.CONSTANT_UNFOLD_ADD
CONSTANT_UNFOLD_MULT = LogicalMutations.CONSTANT_UNFOLD_MULT

In [None]:
import os
import shutil
import subprocess

task_set = "CodeMMLU_MCQ_code_completion"
llmtester = LLMMCQInconsistencyTester(task_set)

prompt_type = FEW_SHOT
model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"

task_type = CodeMMLU.Tasks.CODE_COMPLETION

results_base_dir = os.path.join(proj_dir, f"results", task_type, model_name)
os.makedirs(results_base_dir, exist_ok=True)

mutation_configs = [
    [],
    [RANDOM_MUTATION],
    [SEQUENTIAL_MUTATION],
    [FOR2WHILE],
    [FOR2ENUMERATE],
    [DEMORGAN],
    [LITERAL_FORMAT],
    [BOOLEAN_LITERAL],
    [COMMUTATIVE_REORDER],
    [CONSTANT_UNFOLD],
    [CONSTANT_UNFOLD_ADD],
    [CONSTANT_UNFOLD_MULT]
]

for mutations in mutation_configs:
    mutation_str = "_".join(mutations) if mutations else "no_mutation"

    output_file_path = os.path.join(results_base_dir, f"{task_set}_{prompt_type}_{mutation_str}.csv")
    drive_dst_dir = os.path.join(f"/content/drive/MyDrive/your-repo/", task_type, model_name)
    drive_dst = os.path.join(drive_dst_dir, f"{task_set}_{prompt_type}_{mutation_str}.csv")

    # Ensure Drive folder exists
    os.makedirs(drive_dst_dir, exist_ok=True)

    # Run experiment
    llmtester.run_mcq_inconsistency_test(
    prompt_helper= MCQInconsistencyPromptTemplate.return_model_appropriate_prompt(prompt_type, model_name),
    num_tests=llmtester.question_database.count_documents({}),
    prompt_type= prompt_type,
    mutations=mutations,
    output_file_path=output_file_path,
    task_type =task_type,
    task_set="CodeMMLU",
    model_name=model_name,
    )

    # Overwrite results on Drive
    shutil.copy(output_file_path, drive_dst)
    print(f"Saved {mutation_str} results to Drive")