In [2]:
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path

sys.path.append('../')

import src.qa_gen as qag


In [3]:
# Other models to try here:
# https://console.groq.com/docs/models
GROQ_MODEL = "llama-3.1-70b-versatile"
# GROQ_MODEL = "llama-3.1-70b-specdec"

qa_generator = qag.QAGenerator(api_keys_var="GROQ_API_KEYS",
                           groq_model=GROQ_MODEL,
                           pause_secs=1.0,
                           cache_enabled=False)

Returning secret from environment variable `GROQ_API_KEYS`=`gs...a9`
6 Groq api keys loaded.


In [30]:
chunk = """
The corporation shall be dissolved:
For reasons stipulated by law.
By decision of the Superior Council, taken by two-thirds of its ordinary councilors in office, in two sessions of the Superior Council that must take place with an interval of no less than thirty calendar days, when in their judgment the Institution is unable to fulfill its purposes.
"""

generated_qa = qa_generator.gen_question(chunk, verbose=True)



Please generate one multiple choice question, a correct answer for it and three (3) incorrect answers, based on the following text:
```

The corporation shall be dissolved:
For reasons stipulated by law.
By decision of the Superior Council, taken by two-thirds of its ordinary councilors in office, in two sessions of the Superior Council that must take place with an interval of no less than thirty calendar days, when in their judgment the Institution is unable to fulfill its purposes.

```
The question should be short, i.e. between about 8 and 15 words long and end on question mark.
You should return the question, followed by the correct answer, and then three incorrect answers.
The correct answer and each of the incorrect answers should be short, each no longer than a few words (10 tops.)

QUESTION: <question here between 8 and 15 words long ending in question mark>
CORRECT ANSWER: <correct answer here>
INCORRECT ANSWER 1: <incorrect answer 1 here>
INCORRECT ANSWER 2: <incorrect answe

In [None]:

in_rec = qag.parse_generated_question(generated_qa=generated_qa)


In [5]:
import src.utils as ut
import json


def generate_qas_file(qa_generator: qag.QAGenerator,
                      input_path: Path,
                      src_file_key: str,
                      start_idx: int,
                      out_fpath: Path):
    lines = input_path.read_text().split("\n")
    print(len(lines), "lines")

    with out_fpath.open("wt") as f_out:
        for i, (s, e, src_chunk) in enumerate(ut.chunk_generator(lines, start_idx=start_idx, min_chunk_len=300)):
            print(f"{i:4d} - start:{s:4d} end:{e:4d} len:{len(src_chunk)} "
                f"{src_chunk[:45]!r}...{src_chunk[-45:]!r}")

            generated_qa = qa_generator.gen_question(src_chunk, verbose=(i<10))

            qa_record = qag.parse_generated_question(generated_qa=generated_qa)
            example_id = f"{src_file_key}-s{s}-e{e}"
            out_rec = qag.enrich_generated_qa(qa_record, src_chunk, example_id)
            print(json.dumps(out_rec), file=f_out)


In [None]:

qa_generator = qag.QAGenerator(api_keys_var="GROQ_API_KEYS",
                           groq_model=GROQ_MODEL,
                           pause_secs=1.0,
                           cache_enabled=True)


In [None]:
generate_qas_file(
    qa_generator,
    input_path = Path("../data/Estatutos-Universidad-de-los-Andes-2020-ratificados-MEN-RQ.translated.txt"),
    src_file_key = "estatutos",
    start_idx = 66,
    out_fpath = Path("../data/Estatutos-Universidad-de-los-Andes-2020-ratificados-MEN-RQ.translated.qa.jsonl")
)


690 lines
   0 - start:  66 end:  76 len:490 '\n\n# CHAPTER I\n\n## ON THE NATURE AND PURPOSES '...'the country. Its duration will be indefinite.'
Found in cache!
Advancing start_idx
   1 - start:  68 end:  76 len:488 '# CHAPTER I\n\n## ON THE NATURE AND PURPOSES OF'...'the country. Its duration will be indefinite.'
Found in cache!
Advancing start_idx
   2 - start:  70 end:  76 len:475 '## ON THE NATURE AND PURPOSES OF THE UNIVERSI'...'the country. Its duration will be indefinite.'
Found in cache!
Advancing start_idx
Advancing start_idx
   3 - start:  73 end:  76 len:425 '## Article 1\n\nThe University of the Andes is '...'the country. Its duration will be indefinite.'
Found in cache!
Advancing start_idx
   4 - start:  75 end:  76 len:411 'The University of the Andes is an institution'...'the country. Its duration will be indefinite.'
Found in cache!
Advancing start_idx
   5 - start:  77 end:  83 len:405 'In keeping with its status as an institution '...'es, and centers established for

In [None]:
keys = [ key for key in qa_generator.cache.keys() if 'When a second call is necessary,' in key]
print(len(keys))
for k in keys:
    del qa_generator.cache[k]

0


In [None]:
generated_qa = """Here's the generated question based on the provided text.

QUESTION: What type of relationships does the institution develop?
CORRECT ANSWER: academic, scientific, artistic
INCORRECT ANSWER 1: Only financial relationships
 INCORRECT ANSWER 2: No relationships whatsoever
INCORRECT ANSWER 3: Sports relationships
"""

qag.parse_generated_question(generated_qa)

{'question': "Here's the generated question based on the provided text. What type of relationships does the institution develop?",
 'correct_answer': 'academic, scientific, artistic',
 'incorrect_answers': ['Only financial relationships',
  'No relationships whatsoever',
  'Sports relationships']}