# Llama 2 13B-Chat Generation
This section defines a function to save a list of Q&A dictionaries to a JSON file. The list is sorted by file number and slide number before being saved. This function is used in other sections of the code

In [6]:
import json

def save_json(filename, qa_list):
    """
    Save the QA list of dictionaries to a JSON file.
    """
    # Order qa_list by slide_num and file_num
    qa_list.sort(key=lambda x: (x['file_num'], x['slide_num']))

    # Open the file in write mode
    with open(filename, "w") as file:
        # Write the data to the file
        json.dump(qa_list, file, ensure_ascii=False, indent=4)
    print(f"Saved {len(qa_list)} elements to {filename}.")


This section sets up the Llama 2 model for text generation. It loads the model and tokenizer from the Hugging Face Model Hub, sets the model to evaluation mode, and initializes the text generation pipeline.

In [None]:
from torch import cuda, bfloat16
import torch
import transformers
import os

# Specify the model ID
model_id = 'meta-llama/Llama-2-13b-chat-hf'

# Set the device to CUDA if available, otherwise use CPU
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# Configure the BitsAndBytes quantization settings for the model
bnb_config = transformers.BitsAndBytesConfig(
    bnb_4bit_quant_type='nf4',  # Use 4-bit quantization
    bnb_4bit_use_double_quant=True,  # Use double quantization
    bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 data type for computations
)

# Get the Hugging Face authentication token from environment variables
hf_auth = os.getenv('HF_AUTH_TOKEN')

# Load the model configuration from Hugging Face Model Hub
model_config = transformers.AutoConfig.from_pretrained(
    model_id,  # The model ID
    use_auth_token=hf_auth  # The authentication token
)

# Load the model from Hugging Face Model Hub
llama2_model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,  # The model ID
    trust_remote_code=True,  # Trust the remote code (be careful with this setting)
    config=model_config,  # The model configuration
    quantization_config=bnb_config,  # The quantization configuration
    device_map='auto',  # Automatically map the model to the device
    torch_dtype=torch.float16,  # Use float16 data type for the model
    use_auth_token=hf_auth  # The authentication token
)

# Set the model to evaluation mode
llama2_model.eval()

# Print the device the model is loaded on
print(f"Model loaded on {device}")

# Load the tokenizer from Hugging Face Model Hub
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,  # The model ID
    use_auth_token=hf_auth  # The authentication token
)

# Initialize the text generation pipeline
generate_text = transformers.pipeline(
    model=llama2_model,  # The model
    tokenizer=tokenizer,  # The tokenizer
    return_full_text=False,  # Return the full text or just the generated part
    task='text-generation',  # The task is text generation
    temperature=0.05,  # The 'randomness' of the outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=256,  # The maximum number of tokens to generate in the output
    repetition_penalty=1.1  # The penalty for repetition in the output
)

This section generates questions and answers based on the content of PDF files. It reads the PDF files, extracts the text, generates answers and questions using the Llama 2 model, and saves the results to a list.

In [None]:
# Import necessary libraries
from PyPDF2 import PdfReader
import re
from tqdm import tqdm

def llama2_prompt(sys_prompt: str = "", instruction_list: str = [""], prev_answer_list: list = []):
    """
    Returns a prompt in the format required by the Llama 2 model
    """
    prompt = f"<s>[INST] <<SYS>>\n{sys_prompt}\n<</SYS>>\n\n"
    for i, instruction in enumerate(instruction_list):
        prompt += f"{instruction} [/INST] "
        if i < len(prev_answer_list):
            prompt += f"{prev_answer_list[i]} </s><s>[INST] "
    return prompt

# Define system prompt list
sys_prompt_list = [f"""You are an expert of \"Pattern Recognition\". You have to generate questions in order to challenge students about the subject material. Your answers should be short, concise, fundamental, clear and relevant for the subject.\n"""]

# Define history list and QA list
history_list = []
qa_list = []

# Define slide list
slide_list = [(6, 6), (8, 20), (7, 5), (9, 5), (6, 45), (3, 21), (2, 11), (2, 20), (6, 17), (5, 6), (7, 16), (5, 3), (7, 13), (2, 35), (8, 3), (3, 23), (7, 3), (9, 3), (9, 9), (6, 22), (7, 21)]

# Loop through each slide
for (file_num, slide_num) in tqdm(slide_list):
    # Read the PDF file
    filename = f'material/PR_0{file_num}.pdf'
    reader = PdfReader(filename)
    file_text = [page.extract_text() for page in reader.pages]

    # Define context
    context = ''
    context_range = 2
    for i in range(-context_range, context_range+1):
        try:
            context += "\n" + file_text[slide_num+i] + "\n"
        except:
            print(f"Warning (index out of bounds, slide doesn't exist). Omitting: file={filename}, slide_num={slide_num+i+1}")

    # Define text
    text = file_text[slide_num]

    # Define answers
    answers = []
    ans_prompt_list = [f"Considering the following text: \'\'\'{context}\'\'\'\n\nSeparate the ideas of the following extract and format them as an explanation sentences using the exact expressions in the text, avoiding questions:\n\nExtract: \'\'\'{text}\'\'\'\n\n1)"]
    
    # Generate text for each prompt in ans_prompt_list
    for ans_prompt in ans_prompt_list:
        res = generate_text(llama2_prompt(sys_prompt=sys_prompt_list[0],instruction_list=[ans_prompt]))
        output_text = res[0]["generated_text"]
        # print(re.sub('\n+', '\n', output_text))
        
        # Divide the string into substrings using the Llama 2 pattern "1) ", "2) ", etc.
        subchains = re.split(r"\n\d+\. ", output_text)[1:]
        
        # Erase the number at the end
        subchains = [re.sub(r"\n\d+$", '', subchain) for subchain in subchains]
        answers += subchains

    # Generate questions for each answer
    for sys_prompt in sys_prompt_list:
        for answer in answers:
            prompt_list = [f"""The topic is \"{file_text[1]} - {file_text[2]}\". Considering the following text:\'\'\'{context}\'\'\'. Extract: \'\'\'{answer}\'\'\'. Please generate a very short question which answer is contained in the extract."""]

            # Generate text for each prompt in prompt_list
            for prompt in prompt_list:
                res = generate_text(llama2_prompt(sys_prompt=sys_prompt,instruction_list=[prompt]))
                match = re.search(r"\b.*?\?", res[0]["generated_text"])
                match_text = match.group() if match else ""
                match_text = match_text.replace("Question: ", "")
                
                # Append to qa_list
                qa_list.append({'file_num': file_num, 
                                'slide_num': slide_num,
                                'slide_text': text,
                                'extended_context':  context,
                                'gen_answer': answer,
                                'gen_question_llama_2': match_text})

In [None]:
# Save the QA list to a JSON file
save_json("qa_list.json", qa_list)

This section cleans up the memory by deleting the model and text generation pipeline, triggering the garbage collector, and clearing the GPU memory cache.

In [2]:
import gc

# Delete the 'generate_text' and the 'llama2_model' objects from memory
del generate_text
del llama2_model

# Manually trigger Python's garbage collector
gc.collect()

# Clear the memory cache in PyTorch for the GPU
torch.cuda.empty_cache()

# GPT 3.5 Turbo Generation
This section loops through each Q&A pair in the list, extracts the necessary information, defines the prompt for the OpenAI API, makes a request to the API to generate a question, extracts the generated question from the response, adds the question to the Q&A pair, and appends the updated pair to the list.

In [None]:
import os
import json
import openai
from tqdm import tqdm

# Get the OpenAI API key from environment variable
api_key = os.getenv('OPENAI_API_KEY')

# Set the OpenAI API key
client = openai.OpenAI(api_key=api_key)

# Define system prompt (role)
sys_prompt = """You are an expert of "Pattern Recognition". You have to generate questions in order to challenge students about the subject material. Your answers should be short, concise, fundamental, clear and relevant for the subject.\n\n"""

# Open the file in read mode and load the JSON data
with open("qa_list.json", "r") as file:
    qa_list = json.load(file)

# Initialize an empty list to store the updated QA pairs
qa_list_updated = []

# Loop through each QA pair in the list
for qa in tqdm(qa_list):
    # Extract the necessary information from the QA pair
    file_num = qa['file_num']
    slide_num = qa['slide_num']
    text = qa['slide_text']
    context = qa['extended_context']
    answer = qa['gen_answer']

    # Define the prompt for the OpenAI API
    prompt = f"""Considering the following text:\'\'\'{context}\'\'\'. Extract: \'\'\'{answer}\'\'\'. Please generate a short question which answer is contained in the extract.\n\nQ:"""

    # Make a request to the OpenAI API
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": prompt}
        ]
    )

    # Extract the generated question from the response
    output_text = response.choices[0].message.content

    # Add the generated question to the QA pair
    qa['gen_question_gpt-3.5-turbo'] = output_text

    # Append the updated QA pair to the list
    qa_list_updated.append(qa)

In [None]:
# Save the QA updated list to a JSON file
save_json("qa_list.json", qa_list_updated)

# Flan T5 XXL Generation
This section sets up the T5 model and the text generation pipeline.

In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline

# Specify the name of the model to be used
model_name = 'google/flan-t5-xxl'

# Initialize the tokenizer for the specified model
# The tokenizer will return tensors in PyTorch format
t5_tokenizer = T5Tokenizer.from_pretrained(
    model_name,
    return_tensors="pt")

# Initialize the model for the specified model
# The model will be loaded to the device specified by 'device_map'
# The model will use the 'bfloat16' data type for its tensors
t5_model = T5ForConditionalGeneration.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

# Initialize the text generation pipeline
# The pipeline uses the specified model and tokenizer
# The task is set to 'text2text-generation'
# The repetition penalty is set to 1.1 to prevent the output from repeating
generate_text = pipeline(
    model=t5_model, tokenizer=t5_tokenizer,
    task='text2text-generation',
    repetition_penalty=1.1  # without this output begins repeating
)

This section uses the T5 model to generate questions based on the context and answer of each Q&A pair. The generated questions are added to the Q&A pairs and the updated pairs are stored in a new list.

In [None]:
import json
from tqdm import tqdm

# Open the JSON file and load the data
with open("qa_list.json", "r") as file:
    qa_list = json.load(file)

# Initialize an empty list to store the updated QA pairs
qa_list_updated = []

# Iterate over each QA pair in the list
for qa in tqdm(qa_list):
    # Extract the necessary information from the QA pair
    file_num = qa['file_num']
    slide_num = qa['slide_num']
    text = qa['slide_text']
    context = qa['extended_context']
    answer = qa['gen_answer']

    # Generate a prompt for the text generation model
    prompt = f"Write a question about the context:{context}. For this answer: {answer}. Question:"

    # Use the text generation model to generate a question
    res = generate_text([prompt])

    # Extract the generated question from the model's response
    generated_question = res[0]["generated_text"]

    # Add the generated question to the QA pair
    qa['gen_question_flan_t5_xxl'] = generated_question

    # Append the updated QA pair to the list
    qa_list_updated.append(qa)

In [7]:
# Save the QA updated list to a JSON file
save_json("qa_list.json", qa_list_updated)

Saved 78 elements to qa_list.json.


This section deletes the text generation pipeline and the T5 model from memory, triggers Python's garbage collector, and clears the memory cache in PyTorch for the GPU.

In [10]:
import gc

# Delete the 'generate_text' and the 't5_model' objects from memory
del generate_text
del t5_model

# Manually trigger Python's garbage collector
gc.collect()

# Clear the memory cache in PyTorch for the GPU
torch.cuda.empty_cache()