# Set Up for running experiments on Google Colab

#### Step 1: Start by importing the .env file

Ensure that you have the fields filled in "mongoDB_uri", "collab_token", "GITHUB_USERNAME", "GITHUB_BRANCH_NAME" and "GITHUB_PAT" filled in.

In [None]:
from google.colab import files
files.upload()

#### Step 2: Install python-dotenv package and load the dotenv

In [None]:
! pip install python-dotenv

In [None]:
from dotenv import load_dotenv

load_dotenv()

#### Step 3: Cloning the repository

In [None]:
# 1) Paste your GitHub PAT securely (no echo in output)
import os, subprocess

GITHUB_USER = os.getenv('GITHUB_USERNAME')
GITHUB_BRANCH_NAME = os.getenv("GITHUB_BRANCH_NAME")

os.environ["GH_TOKEN"] = os.getenv("GITHUB_PAT")

# 2) Clone the specific branch (hide output so token isn't printed)
url = f"https://{GITHUB_USER}:{os.environ['GH_TOKEN']}@github.com/your-org/your-repo.git"
cmd = ["git","clone","-b", GITHUB_BRANCH_NAME, "--single-branch", "--depth","1", url]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

# # 3) (Optional) Remove token from the saved remote to avoid accidental leaks
# import pathlib, shlex, json
# repo_dir = pathlib.Path(REPO)
# subprocess.run(["git","-C", str(repo_dir), "remote","set-url","origin",
#                 f"https://github.com/{GH_USER}/{REPO}.git"], check=True)

#### Step 4: Change directory to the cloned Github Repo

In [None]:
%cd {"your-repo"}

#### Step 5: Pip install the necessary packages from requirements-colab.txt

In [None]:
! pip install -r requirements-colab.txt

#### Step 6: Login into HuggingFace

In [None]:
import os
from huggingface_hub import login

hf_token = os.getenv('collab_token')

# Login to Hugging Face
login(token=hf_token)

#### Step 7: Downloading the desired model.

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(model_name)

#### Step 8: Ensuring that the model works

In [None]:
import torch

if torch.cuda.is_available():
    model = model.to("cuda")

# test prompt
prompt = "The capital of France is"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=20,
    do_sample=False,
)

decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_text)
del model

## FROM THIS STEP ON, COPY AND PASTE WHATEVER EXPERIMENT CELLS YOU NEED.

Do remember to do this step first before uploading into Google Colab

## LLM Consistency Testing with Mistral LLM

This notebook contains code for testing code inconsistency in Mistral LLM

In [None]:
import os
import sys

In [None]:
curr_dir = os.getcwd()
parent_dir = os.path.dirname(curr_dir)
proj_dir = os.path.dirname(parent_dir)
sys.path.append(proj_dir)

In [None]:
from code_generation.code_generation_tester import CodeGenerationTester
from code_generation.prompt_templates.prompt_template import OpenEndedPromptTemplate
from utility.constants import BigCodeBench, HumanEval, LexicalMutations, SyntacticMutations, LogicalMutations, PromptTypes, CodeGeneration, ReasoningModels, NonReasoningModels

# Declaring constants

In [None]:
## Declaring Prompt Type Constants
ZERO_SHOT = PromptTypes.ZERO_SHOT
ONE_SHOT = PromptTypes.ONE_SHOT
FEW_SHOT = PromptTypes.FEW_SHOT

## Declaring Mutation Constants
RANDOM_MUTATION = LexicalMutations.RANDOM
SEQUENTIAL_MUTATION = LexicalMutations.SEQUENTIAL
LITERAL_FORMAT = LexicalMutations.LITERAL_FORMAT

## Declaring Benchmark Name Constants
BIGCODEBENCH = BigCodeBench.NAME
HUMANEVAL = HumanEval.NAME

In [None]:
import torch
import gc

# Clear GPU memory before running the test
torch.cuda.empty_cache()
gc.collect()

# Check available memory
print(f"Available GPU memory: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()) / 1024**3:.2f} GB")

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import os
import shutil
import subprocess

prompt_type = FEW_SHOT
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"

task_set = HUMANEVAL

try:
    llmtester = CodeGenerationTester(f"{task_set}_Code_Generation")
except Exception as e:
    print(f'llmtester could not launch due to the following error: {e}')


results_base_dir = os.path.join(proj_dir, f'results/code_generation/{model_name}')
os.makedirs(results_base_dir, exist_ok=True)

mutation_configs = [
    [],
    [RANDOM_MUTATION],
    [SEQUENTIAL_MUTATION],
]

for mutations in mutation_configs:
    mutation_str = "_".join(mutations) if mutations else "no_mutation"

    output_file_path = os.path.join(results_base_dir, f"Llama_Final_Runs_{task_set}_{prompt_type}_{mutation_str}.csv")
    drive_dst_dir = os.path.join(f"/content/drive/MyDrive/your-repo/code_generation/", model_name)
    drive_dst = os.path.join(drive_dst_dir, f"Llama_Final_Runs_{task_set}_{prompt_type}_{mutation_str}.csv")

    # Ensure Drive folder exists
    os.makedirs(drive_dst_dir, exist_ok=True)

    # Run experiment
    llmtester.run_code_generation_test(
    prompt_helper = OpenEndedPromptTemplate.return_model_appropriate_prompt(prompt_type, model_name),
    num_tests=llmtester.question_database.count_documents({}),
    mutations = mutations,
    prompt_type= prompt_type,
    output_file_path=output_file_path,
    task_set = task_set,
)

    # Overwrite results on Drive
    shutil.copy(output_file_path, drive_dst)
    print(f"Saved {mutation_str} results to Drive")