# FortiQA LM-Eval-Harness Experiments

In [1]:
# This doesn't save? pip install -r requirements.txt # save in one file
# Install LM-Eval
!pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
# Install HuggingFace Dataset Loader
!pip install datasets # produces an error and does not allow dataset to load
# pip show datasets huggingface_hub # shows outdated version 0.23.2
!pip install --upgrade huggingface_hub
!pip install --upgrade datasets

Collecting git+https://github.com/EleutherAI/lm-evaluation-harness.git
  Cloning https://github.com/EleutherAI/lm-evaluation-harness.git to /tmp/pip-req-build-76871vin
  Running command git clone --filter=blob:none --quiet https://github.com/EleutherAI/lm-evaluation-harness.git /tmp/pip-req-build-76871vin
  Resolved https://github.com/EleutherAI/lm-evaluation-harness.git to commit a9a0e3caaeecf3fb479c7c224fffd0af30a6ed96
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


## Set up SecQA Task

In [2]:
from datasets import load_dataset
dataset = load_dataset("zefang-liu/secqa", "secqa_v1")
print(dataset)

DatasetDict({
    dev: Dataset({
        features: ['Question', 'A', 'B', 'C', 'D', 'Answer', 'Explanation'],
        num_rows: 5
    })
    val: Dataset({
        features: ['Question', 'A', 'B', 'C', 'D', 'Answer', 'Explanation'],
        num_rows: 12
    })
    test: Dataset({
        features: ['Question', 'A', 'B', 'C', 'D', 'Answer', 'Explanation'],
        num_rows: 110
    })
})


In [3]:
YAML_secqa_string = """
task: secqa
dataset_path: zefang-liu/secqa
dataset_name: secqa_v1
output_type: multiple_choice
training_split: dev
validation_split: val
test_split: test
doc_to_text: "{{Question}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the correct answer? Use only the letter."
doc_to_target: Answer
doc_to_choice: ["A","B","C","D"]
should_decontaminate: true
doc_to_decontamination_query: passage
output_type: multiple_choice
metric_list:
  - metric: acc
"""
with open("secqa.yaml", "w") as f:
    f.write(YAML_secqa_string)

# In terminal
!mkdir -p /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/secqa
!cp /home/jovyan/secqa.yaml /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/secqa/


!export GIT_DISCOVERY_ACROSS_FILESYSTEM=1

# git config --global credential.helper store
# huggingface-cli login
#to access restricted models ^, try below next time to avoid terminal

## TinyLlama-1.1B-Chat-v1.0

In [8]:
#--model_args pretrained= ./llama-3.2-1b-instruct-q8_0.gguf

#Eluther model # takes a long time to run

#limit = number of samples to run/questions to as for evaluation

#    --output_path ./results #only include when ready to log, not sure how to delete files

In [12]:
# TinyLlama-1.1B-Chat-v1.0 successful experiment #takes several minutes
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

2025-02-23 18:52:46.591286: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-23 18:52:46.638088: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-23:18:52:49,249 INFO     [lm_eval.__main__:307] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-23:18:52:50,232 INFO     [lm_eval.tasks:460] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-23:18:52:50,233 INFO     [lm_eval.tasks:460] The tag 'kobest' 

## Mistral-7B

In [4]:
from huggingface_hub import login
login(token="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")

In [43]:
# !export HUGGING_FACE_HUB_TOKEN=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
# !HUGGING_FACE_HUB_TOKEN=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 lm_eval \
#     --model hf \
#     --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
#     --model_args pretrained=mistralai/Mistral-7B-Instruct,token=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 \
#     --tasks secqa \
#     --limit 10 \
#     --verbosity DEBUG

# raise EnvironmentError(
# OSError: mistralai/Mistral-7B-Instruct is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
# If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [1]:
# from huggingface_hub import HfApi
# api = HfApi()
# api.model_info("mistralai/Mistral-7B-Instruct", token="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")

# RepositoryNotFoundError: 404 Client Error. (Request ID: Root=1-67aaf6e2-3aebb68e4c780c423c471c29;59cfe869-d2f1-4585-b1e2-bb5dbece4f59)

# Repository Not Found for url: https://huggingface.co/api/models/mistralai/Mistral-7B-Instruct.
# Please make sure you specified the correct `repo_id` and `repo_type`.
# If you are trying to access a private or gated repo, make sure you are authenticated.

In [5]:
from huggingface_hub import whoami

print(whoami(token="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"))

{'type': 'user', 'id': '67a813c39cf69ce2315ea2b7', 'name': 'mgaddi', 'fullname': 'Matilda Gaddi', 'isPro': False, 'avatarUrl': 'https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/fpVpRk3UYfML7boNVQPWd.png', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'token1', 'role': 'fineGrained', 'createdAt': '2025-02-09T02:59:01.120Z', 'fineGrained': {'canReadGatedRepos': True, 'global': ['inference.serverless.write'], 'scoped': [{'entity': {'_id': '67a813c39cf69ce2315ea2b7', 'type': 'user', 'name': 'mgaddi'}, 'permissions': ['repo.content.read', 'inference.endpoints.infer.write', 'user.webhooks.read', 'collection.read']}]}}}}


In [17]:
from huggingface_hub import HfFolder
print(HfFolder.get_token())

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


In [22]:
# Load model directly #Takes like 4 minutes
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

In [27]:
# Save the model and tokenizer locally #Takes ~4min
model.save_pretrained("./mistralai/Mistral-7B-Instruct-v0.2")
tokenizer.save_pretrained("./mistralai/Mistral-7B-Instruct-v0.2")

('./mistralai/Mistral-7B-Instruct-v0.2/tokenizer_config.json',
 './mistralai/Mistral-7B-Instruct-v0.2/special_tokens_map.json',
 './mistralai/Mistral-7B-Instruct-v0.2/tokenizer.model',
 './mistralai/Mistral-7B-Instruct-v0.2/added_tokens.json',
 './mistralai/Mistral-7B-Instruct-v0.2/tokenizer.json')

In [None]:
# with model locally saved 
# needs whole path to model
# not getting past Loading checkpoint shards:   0%|                          | 0/6 [00:00<?, ?it/s]^C
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=/home/jovyan/mistralai/Mistral-7B-Instruct-v0.2 \
    --tasks secqa \
    --limit 10 \
    --verbosity DEBUG

2025-02-11:08:04:33,400 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-11:08:04:33,400 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-11:08:04:34,838 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-11:08:04:45,011 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-11:08:04:53,763 INFO     [__main__.py:381] Selected Tasks: ['secqa']
2025-02-11:08:04:53,765 INFO     [evaluator.py:165] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234 | Setting fewshot manual seed to 1234
2025-02-11:08:04:53,765 INFO     [evaluator.py:202] Initializing hf model, with arguments: {'pretrained': '/home/jovyan/mistralai/Mistral-7B-Instruct-v0.2'}
2025-02-11:08:04:53,852 INFO     [huggin

## LLaMa-7B

In [None]:
# stuck for over 10min at, Loading checkpoint shards:   0%|                          | 0/2 [00:00<?, ?it/s]

In [4]:
#!pip install --upgrade pandas
##!pip install --upgrade numpy #do not, not compaible #this might have killed the server previously
#!pip install --upgrade torch

Collecting numpy
  Using cached numpy-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Using cached numpy-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cupy-cuda11x 13.1.0 requires numpy<1.29,>=1.22, but you have numpy 2.2.2 which is incompatible.
cuquantum-python-cu11 24.3.0.post1 requires numpy~=1.21, but you have numpy 2.2.2 which is incompatible.
langchain 0.1.20 requires numpy<2,>=1, but you have numpy 2.2.2 which is incompatible.
langchain-community 0.0.38 requires numpy<2,>=1, but you have numpy 2.2.2 which is incompatible.
tensorflow 2.16.1 requires numpy<

In [5]:
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=meta-llama/Llama-2-7b-chat-hf \
    --tasks secqa \
    --limit 10 \
    --verbosity DEBUG


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/conda/bin/lm_eval", line 5, in <module>
    from lm_eval.__main__ import cli_evaluate
  File "/opt/conda/lib/python3.11/site-packages/lm_eval/__init__.py", line 1, in <module>
    from .evaluator import evaluate, simple_evaluate
  File "/opt/conda/lib/python3.11/site-packages/lm_eval/evaluator.py", line 12, in <module>
    import lm_eval.api.metrics
  File "/opt/conda/lib/python3.11/site-packages/lm_eval/api/metrics.py", line 12, in <module>
    from lm_eval.api.registry import register_aggregation, register_m

## CyberBench

In [15]:
import nltk.data
print(nltk.data.path)

LookupError: 
**********************************************************************
  Resource [93mwordnet[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('wordnet')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mcorpora/wordnet[0m

  Searched in:
    - '/home/jovyan/nltk_data'
    - '/opt/conda/nltk_data'
    - '/opt/conda/share/nltk_data'
    - '/opt/conda/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************


In [10]:
from nltk.corpus import wordnet

In [19]:
import nltk

ModuleNotFoundError: No module named 'nltk'

In [23]:
!pip install nltk



In [24]:
import nltk

ModuleNotFoundError: No module named 'nltk'

In [20]:
!pip list | grep nltk

nltk                          3.9


In [11]:
#!pip install --upgrade numpy

## GPTQModel

In [11]:
## clone repo
#!git clone https://github.com/ModelCloud/GPTQModel.git && cd GPTQModel
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-12.4'  # have to explicitly define CUDA_HOME for gptqmodel install to work
os.environ['PATH'] = f"{os.environ['CUDA_HOME']}/bin:{os.environ['PATH']}"
os.environ['LD_LIBRARY_PATH'] = f"{os.environ['CUDA_HOME']}/lib64:{os.environ.get('LD_LIBRARY_PATH', '')}"
!echo $CUDA_HOME

/usr/local/cuda-12.4


In [12]:
!pip install -r /home/jovyan/GPTQModel/requirements.txt

Collecting numpy>=2.2.2 (from -r /home/jovyan/GPTQModel/requirements.txt (line 3))
  Using cached numpy-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Using cached numpy-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.3
    Uninstalling numpy-1.24.3:
      Successfully uninstalled numpy-1.24.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cupy-cuda11x 13.1.0 requires numpy<1.29,>=1.22, but you have numpy 2.2.3 which is incompatible.
cuquantum-python-cu11 24.3.0.post1 requires numpy~=1.21, but you have numpy 2.2.3 which is incompatible.
langchain 0.1.20 requires numpy<2,>=1, but you have numpy 2.2.3 which is incompatible.
langchain-community 0.0.38 requires numpy<2,>=1, but you have nu

In [13]:
!pip install gptqmodel --no-build-isolation

Collecting gptqmodel
  Using cached gptqmodel-1.9.0+cu121torch2.3-cp311-cp311-linux_x86_64.whl
Discarding [4;34mhttps://files.pythonhosted.org/packages/db/b6/348569757042bb3a674935fe20ae059e56866b4c544e0fa354daf47db14f/gptqmodel-1.9.0.tar.gz (from https://pypi.org/simple/gptqmodel/) (requires-python:>=3.9.0)[0m: [33mRequested gptqmodel from file:///home/jovyan/.cache/pip/wheels/75/dc/a8/85f4e3aafe265c6f4df9f372b22be86b20de8d1b65b0680268/gptqmodel-1.9.0%2Bcu121torch2.3-cp311-cp311-linux_x86_64.whl has inconsistent version: expected '1.9.0', but metadata has '1.9.0+cu121torch2.3'[0m
  Using cached gptqmodel-1.8.1+cu121torch2.3-cp311-cp311-linux_x86_64.whl
Discarding [4;34mhttps://files.pythonhosted.org/packages/46/30/cb98de206a29807a59e63c10238eb90570143229df9b67a9a7f2342672bd/gptqmodel-1.8.1.tar.gz (from https://pypi.org/simple/gptqmodel/) (requires-python:>=3.9.0)[0m: [33mRequested gptqmodel from file:///home/jovyan/.cache/pip/wheels/64/3e/c4/a4d5f7b326cc1970ddee6324a2d6652b2cd2

In [8]:
import numpy
numpy.version.version

'1.26.4'

In [9]:
!pip uninstall -y numpy
!pip install numpy==1.24.3 # to work with multiarray module used in gptqmodel

Found existing installation: numpy 2.2.3
Uninstalling numpy-2.2.3:
  Successfully uninstalled numpy-2.2.3
Collecting numpy==1.24.3
  Using cached numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Using cached numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Installing collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
together 1.2.0 requires pillow<11.0.0,>=10.3.0, but you have pillow 11.1.0 which is incompatible.
tensorflow 2.16.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.29.3 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.24.3


In [10]:
# !export CUDA_HOME=/opt/conda && echo $CUDA_HOME
# # !export CUDA_HOME=/home/jovyan/cuda-12.4 not working but right version
# #!pip install gptqmodel
# #!pip install lm-eval>=0.4.7
# ## !pip install -v gptqmodel --no-build-isolation

# # pip: compile and install
# # You can install optional modules like autoround, ipex, vllm, sglang, bitblas, and ipex.
# # Example: pip install -v --no-build-isolation .[vllm,sglang,bitblas,ipex,auto_round]

# #in terminal:
# cd GPTQModel
# pip install -v . --no-build-isolation

In [16]:
!pip install gptqmodel[evalplus]

[0m

In [7]:
#!uv pip install -v gptqmodel --no-build-isolation
from gptqmodel import GPTQModel
from gptqmodel.utils.eval import EVAL

# model_id = "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1"

# # Use `lm-eval` as framework to evaluate the model
# lm_eval_results = GPTQModel.eval(model_id, framework=EVAL.LM_EVAL, tasks=[EVAL.LM_EVAL.ARC_CHALLENGE], output_file='lm-eval_result.json')

# # Use `evalplus` as framework to evaluate the model
# evalplus_results = GPTQModel.eval(model_id, framework=EVAL.EVALPLUS, tasks=[EVAL.EVALPLUS.HUMAN], output_file='evalplus_result.json')

ModuleNotFoundError: No module named 'gptqmodel'

In [13]:
#setup
!pip install -r FortiQAGPTQrequirements.txt

1039.17s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Collecting git+https://github.com/EleutherAI/lm-evaluation-harness.git (from -r FortiQAGPTQrequirements.txt (line 1))
  Cloning https://github.com/EleutherAI/lm-evaluation-harness.git to /tmp/pip-req-build-87usys_i
  Running command git clone --filter=blob:none --quiet https://github.com/EleutherAI/lm-evaluation-harness.git /tmp/pip-req-build-87usys_i
  Resolved https://github.com/EleutherAI/lm-evaluation-harness.git to commit 5e0b6f16cc5121eb889d73ed3c7418def096c3f9
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting beautifulsoup4==4.10.0 (from -r FortiQAGPTQrequirements.txt (line 6))
  Using cached beautifulsoup4-4.10.0-py3-none-any.whl.metadata (3.5 kB)
Collecting evaluate==0.4.0 (from -r FortiQAGPTQrequirements.txt (line 7))
  Using cached evaluate-0.4.0-py3-none-an

In [10]:
!pip install gptqmodel --no-build-isolation

Collecting gptqmodel
  Using cached gptqmodel-1.9.0+cu121torch2.3-cp311-cp311-linux_x86_64.whl
Discarding [4;34mhttps://files.pythonhosted.org/packages/db/b6/348569757042bb3a674935fe20ae059e56866b4c544e0fa354daf47db14f/gptqmodel-1.9.0.tar.gz (from https://pypi.org/simple/gptqmodel/) (requires-python:>=3.9.0)[0m: [33mRequested gptqmodel from file:///home/jovyan/.cache/pip/wheels/75/dc/a8/85f4e3aafe265c6f4df9f372b22be86b20de8d1b65b0680268/gptqmodel-1.9.0%2Bcu121torch2.3-cp311-cp311-linux_x86_64.whl has inconsistent version: expected '1.9.0', but metadata has '1.9.0+cu121torch2.3'[0m
  Using cached gptqmodel-1.8.1+cu121torch2.3-cp311-cp311-linux_x86_64.whl
Discarding [4;34mhttps://files.pythonhosted.org/packages/46/30/cb98de206a29807a59e63c10238eb90570143229df9b67a9a7f2342672bd/gptqmodel-1.8.1.tar.gz (from https://pypi.org/simple/gptqmodel/) (requires-python:>=3.9.0)[0m: [33mRequested gptqmodel from file:///home/jovyan/.cache/pip/wheels/64/3e/c4/a4d5f7b326cc1970ddee6324a2d6652b2cd2

In [7]:
YAML_secqa_string = """
task: secqa
dataset_path: zefang-liu/secqa
dataset_name: secqa_v1
output_type: multiple_choice
training_split: dev
validation_split: val
test_split: test
doc_to_text: "{{Question}}\nA) {{A}}\nB) {{B}}\nC) {{C}}\nD) {{D}}\nWhat is the correct answer? Use only the letter."
doc_to_target: Answer
doc_to_choice: ["A","B","C","D"]
should_decontaminate: true
doc_to_decontamination_query: passage
output_type: multiple_choice
metric_list:
  - metric: acc
"""
with open("secqa.yaml", "w") as f:
    f.write(YAML_secqa_string)


!mkdir -p /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/secqa
!cp /home/jovyan/secqa.yaml /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/secqa/


!export GIT_DISCOVERY_ACROSS_FILESYSTEM=1

853.16s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Usage: pip [options]

[31mERROR: Invalid requirement: gptqmodel --no-build-isolation
pip: error: no such option: --no-build-isolation
[0m[31m
[0m858.75s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
863.88s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
869.01s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


In [6]:
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ,gptqmodel=True \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

2025-02-24 17:19:35.264104: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-24:17:19:39,452 INFO     [lm_eval.__main__:307] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-24:17:19:44,224 DEBUG    [lm_eval.tasks:523] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-24:17:19:44,230 DEBUG    [lm_eval.tasks:523] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-24:17:19:44,237 DEBUG    [lm_eval.tasks:523] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-24:17:19:52,068 INFO     [lm_eval.task

In [17]:
# TheBloke/Llama-2-7B-GPTQ 
# !pip uninstall -y huggingface_hub
# !pip install huggingface_hub==0.17.3
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/Llama-2-7B-GPTQ,gptqmodel=True,device_map=auto \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG \
    --output_path results

2025-02-18:23:29:44,805 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-18:23:29:44,806 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-18:23:29:46,958 DEBUG    [__init__.py:522] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:29:46,961 DEBUG    [__init__.py:522] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:29:46,964 DEBUG    [__init__.py:522] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:29:50,494 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-18:23:29:50,495 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not 

In [79]:
# TheBloke/Mistral-7B-v0.1-GPTQ
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/Mistral-7B-v0.1-GPTQ,gptqmodel=True \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2025-02-17:20:03:00,376 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-17:20:03:00,376 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-17:20:03:02,556 DEBUG    [__init__.py:522] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:02,559 DEBUG    [__init__.py:522] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:02,562 DEBUG    [__init__.py:522] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:06,142 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-17:20:03:06,144 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not 

In [81]:
# TheBloke/Mistral-7B-Instruct-v0.2-GPTQ
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/Mistral-7B-Instruct-v0.2-GPTQ,gptqmodel=True \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2025-02-17:20:04:31,714 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-17:20:04:31,715 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-17:20:04:33,894 DEBUG    [__init__.py:522] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:04:33,897 DEBUG    [__init__.py:522] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:04:33,900 DEBUG    [__init__.py:522] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:04:37,500 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-17:20:04:37,501 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not 

In [11]:
# TheBloke/zephyr-7B-beta-GPTQ
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/zephyr-7B-beta-GPTQ,gptqmodel=True \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

2025-02-18:23:18:24,237 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-18:23:18:24,237 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-18:23:18:26,454 DEBUG    [__init__.py:522] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:18:26,457 DEBUG    [__init__.py:522] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:18:26,460 DEBUG    [__init__.py:522] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-18:23:18:30,122 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-18:23:18:30,124 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not 

In [80]:
# TheBloke/phi-2-GPTQ
!lm_eval \
    --model hf \
    --include_path /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/ \
    --model_args pretrained=TheBloke/phi-2-GPTQ,gptqmodel=True \
    --tasks secqa \
    --limit 120 \
    --verbosity DEBUG

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


2025-02-17:20:03:47,451 INFO     [__main__.py:284] Verbosity set to DEBUG
2025-02-17:20:03:47,451 INFO     [__main__.py:308] Including path: /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/
2025-02-17:20:03:49,611 DEBUG    [__init__.py:522] File _evalita-mp_ner_adg.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:49,614 DEBUG    [__init__.py:522] File _evalita-mp_ner_fic.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:49,617 DEBUG    [__init__.py:522] File _evalita-mp_ner_wn.yaml in /opt/conda/lib/python3.11/site-packages/lm_eval/tasks/evalita_llm could not be loaded
2025-02-17:20:03:53,184 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
2025-02-17:20:03:53,186 INFO     [__init__.py:459] The tag 'kobest' is already registered as a group, this tag will not 