## Installing Required Packages

### Packages for `LambdaLabs` Cloud GPUs

In [None]:
# Required for Lambda Labs

!pip -q install --upgrade pip
!pip -q install python-dotenv
!pip -q install --upgrade numexpr bottleneck scipy            # required for pandas in LambdaLabs
!pip -q install --upgrade pandas transformers bitsandbytes accelerate
# !pip -q install xformers einops optimum sentencepiece         # required for some models
!pip install gpustat                                          # to watch GPU utilization and memory
!pip install langdetect                                       # to check response language

# !pip show transformers torch                                    # transformers should be > 4.40, torch >= 2.1

In [None]:
# install flash attention 2. Make computations faster for some models
!pip uninstall -y ninja && pip install ninja
!pip install --upgrade torch==2.1
!pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.7/flash_attn-2.5.7+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

# or:
# !yes | git clone git@github.com:Dao-AILab/flash-attention.git
# !python ./flash-attention/setup.py install
# !pip install flash-attn --no-build-isolation

### To install a model locally:

In [None]:
# First
!sudo apt-get install git-lfs
!git lfs install

# Then
!git clone https://huggingface.co/mistralai/Mistral-7B-v0.2

# Finally
model_id = './Mistral-7B-v0.2'

### To clean CPU memory and remove models that were downloaded:
In CLI:  
```
ls ~/.cache/huggingface/hub
rm -rf ~/.cache/huggingface/hub/[model_folder]
```
If GPU memory is not enough while inferencing, try the following in Jupyter Notebook then restart kernel  
```
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
```

## Import Utilities

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, BitsAndBytesConfig#, pipeline
import pandas as pd
import torch
import os
import sys
sys.path.append('..')  # Add the parent directory of LLM_Evaluations to the Python path

from Utils.llm_evaluation_utils import load_responses_df,         \
                        check_and_store_response,   \
                        QUESTION_SETS

from dotenv import load_dotenv
from huggingface_hub import login

load_dotenv()
access_token = os.environ.get('HF_API_KEY')

login(access_token)
question_type = 'GS'
QUESTION_HEAD = QUESTION_SETS[question_type]['QUESTION_HEAD']
QUESTIONS = QUESTION_SETS[question_type]['QUESTIONS']
QUESTION_TAIL = QUESTION_SETS[question_type]['QUESTION_TAIL']

#### Initially I used `pipeline` from `transformers` for inference. 
However, configuring the pipeline directly is deprecated. Generation Config should be used in this case

In [None]:
"""
def initialize_model_and_tokenizer(model_id, task, max_length=4096, torch_dtype='auto', load_in_8bit=False):
    '''Initialize model and tokenizer.'''
    # bits_config = BitsAndBytesConfig(load_in_4bit=True)
    # gptq_config = GPTQConfig(bits=4)
    model_kwargs={'max_length': max_length, 'load_in_8bit': load_in_8bit}
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    return pipeline(model=model_id,
                    task=task,
                    tokenizer=tokenizer,
                    torch_dtype=torch_dtype, #torch.bfloat16,
                    device=0,
                    # device_map='auto',
                    model_kwargs=model_kwargs,
                    trust_remote_code=True,
                    # quantization_config=bnb_config,
                    )

def run_llm(pipe):
    '''Generate response from the language model pipeline.'''
    return pipe(
                prompt,
                # max_new_tokens=30,
                num_return_sequences=1,
                eos_token_id=pipe.tokenizer.eos_token_id,
                )[0]['generated_text']
"""

## LLM Models
Each model has its configuration parameters and prompt style to be used

In [None]:
models = [
    {
        'model_name': 'falcon-7b-instruct',
        'model_id': 'tiiuae/falcon-7b-instruct',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'falcon-40b-instruct',
        'model_id': 'tiiuae/falcon-40b-instruct',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Mistral-7B-Instruct',
        'model_id': 'mistralai/Mistral-7B-Instruct-v0.2',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'[INST]Instruction: {{instruction}}\nTranscript:"{{transcript}}"\nScore:[/INST]',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Mixtral-8x7B-Instruct',
        'model_id': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'[INST]Instruction: {{instruction}}\nTranscript:"{{transcript}}"\nScore:[/INST]',
        'quantize': 'int4',
        'torch_dtype': 'auto',
        # attn_implementation='flash_attention_2',
    },
    {
        'model_name': 'phi-2',
        'model_id': 'microsoft/phi-2',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'AdaptLLM-medicine',
        'model_id': 'AdaptLLM/medicine-LLM',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': torch.float16,
    },
    {
        'model_name': 'Yi-34B',
        'model_id': '01-ai/Yi-34B',
        'task': 'text-generation',
        'max_length': 4096,
        'prompt_template': f'# Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': torch.float16,
    },
    {
        'model_name': 'Qwen1_5-72B',
        'model_id': 'Qwen/Qwen1.5-72B',
        'task': 'text-generation',
        'max_length': 32768,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'mpt-30b',
        'model_id': 'mosaicml/mpt-30b',
        'task': 'text-generation',
        'max_length': 8192,
        'prompt_template': f'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n###Instruction: {{instruction}}\nTranscript: "{{transcript}}"\n\n### Response: Score:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Xwin-70B',   # did not complete inference on the whole dataset
        'model_id': 'Xwin-LM/Xwin-LM-70B-V0.1',
        'task': 'text-generation',
        'max_length': 4096,
        'prompt_template': f'''A chat between a curious user and an artificial intelligence assistant. 
The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: Instruction: {{instruction}}
Transcript: "{{transcript}}"
ASSISTANT: Score:''',
        'quantize': 'int4',
        'torch_dtype': 'auto',
        #    do_sample=True,
    },
    {
        'model_name': 'vicuna-33b',
        'model_id': 'lmsys/vicuna-33b-v1.3',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'''A chat between a curious user and an artificial intelligence assistant. 
The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: Instruction: {{instruction}}
Transcript: "{{transcript}}"
ASSISTANT: Score:''',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Orca-2',
        'model_id': 'microsoft/Orca-2-13b',
        'task': 'text-generation',
        'max_length': 4096,
        # 'system': 'You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions.',
        # 'user': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        # 'prompt_template': f'<|im_start|>system\n{{system}}<|im_end|>\n<|im_start|>user\n{{user}}<|im_end|>\n<|im_start|>assistant',
        # prompt_template = matching_model['prompt_template'].format(system=matching_model['system'], user=matching_model['user'])
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto'
    },
    {
        'model_name': 'KTO_Mistral_PairRM', #7B
        'model_id': 'ContextualAI/Contextual_KTO_Mistral_PairRM',
        'task': 'text-generation',
        'max_length': 2048,
        'prompt_template': f'<|user|>\nInstruction: {{instruction}}\nTranscript: "{{transcript}}"\n\n<|assistant|>\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Ein-72B',
        'model_id': 'SF-Foundation/Ein-72B-v0.1-full',
        'task': 'text-generation',
        'max_length': 4096,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Mixtral-8x22B-Instruct',
        'model_id': 'mistralai/Mixtral-8x22B-Instruct-v0.1',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        # 'prompt_template': f'[INST]Instruction: {{instruction}}\nTranscript:"{{transcript}}\nScore:[/INST]',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Llama-3-8B-Instruct',
        'model_id': 'meta-llama/Meta-Llama-3-8B-Instruct',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        # 'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'user': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'prompt_template': f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n
{{user}}<|eot_id|><|start_header_id|>assistant<|end_header_id|>''',
#         'system': f'{{instruction}}',
#         'user': f'Transcript: "{{transcript}}"\nScore:',
#         'prompt_template': f'''<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n
# {{system}}<|eot_id|><|start_header_id|>user<|end_header_id|>\n
# {{user}}<|eot_id|><|start_header_id|>assistant<|end_header_id|>''',
        'quantize': None,
        'torch_dtype': torch.float16,
    },
    {
        'model_name': 'Llama-3-70B-Instruct',
        'model_id': 'meta-llama/Meta-Llama-3-70B-Instruct',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        # 'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'user': f'{{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'prompt_template': f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n
{{user}}<|eot_id|><|start_header_id|>assistant<|end_header_id|>''',
#         'system': f'{{instruction}}',
#         'user': f'Transcript: "{{transcript}}"\nScore:',
#         'prompt_template': f'''<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n
# {{system}}<|eot_id|><|start_header_id|>user<|end_header_id|>\n
# {{user}}<|eot_id|><|start_header_id|>assistant<|end_header_id|>''',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Rhea',
        'model_id': 'davidkim205/Rhea-72b-v0.5',
        'task': 'text-generation',
        'max_length': 4096,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'MultiVerse_70B',
        'model_id': 'MTSAIR/MultiVerse_70B',
        'task': 'text-generation',
        'max_length': 4096,
        'prompt_template': f'### Instruction: {{instruction}}\nTranscript: "{{transcript}}"\n### Response: Score:',
        'quantize': 'int4',
        'torch_dtype': 'auto',
    },
    # {
    #     'model_name': 'Smaug-72B',        # I did not try this one
    #     'model_id': 'abacusai/Smaug-72B-v0.1',
    #     'task': 'text-generation',
    #     'max_length': 4096,
    #     'prompt_template': f'[INST]<<SYS>>Instruction: {{instruction}}<</SYS>>\nTranscript: "{{transcript}}"[/INST]\nScore:',
    #     'quantize': 'int4',
    #     'torch_dtype': 'auto',
    # },
    {
        'model_name': 'BioMistral',
        'model_id': 'BioMistral/BioMistral-7B',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'meerkat',
        'model_id': 'dmis-lab/meerkat-7b-v1.0',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'### Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'Phi-3-mini-128k-instruct',
        'model_id': 'microsoft/Phi-3-mini-4k-instruct',
        'task': 'text-generation',
        'max_length': 4096,
        'user': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'prompt_template': f'<|user|>\n{{user}}<|end|>\n<|assistant|>',
        # 'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
    {
        'model_name': 'meditron',           # hallucinations
        'model_id': 'epfl-llm/meditron-70b',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        # 'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'system': f'{{instruction}}',
        'user':f'Transcript: "{{transcript}}"\nScore:',
        'quantize': 'int4',
        'torch_dtype': torch.float16,
    },    
    {
        'model_name': 'JSL-Med',        # rubish
        'model_id': 'johnsnowlabs/JSL-Med-Sft-Llama-3-8B',
        'task': 'text-generation',
        'max_length': 1000000000000000019884624838656,
        'prompt_template': f'Instruction: {{instruction}}\nTranscript: "{{transcript}}"\nScore:',
        'quantize': None,
        'torch_dtype': 'auto',
    },
]

## Selecting and Loading a Model

In [None]:
model_name = 'Orca-2'

try:
    matching_model = next((model for model in models if model['model_name'] == model_name), None)

    if matching_model is not None:
        model_id = matching_model['model_id']
        print('Loading model:', model_id)
        
        # Preparing prompt template
        prompt_template = matching_model['prompt_template']
        if 'system' in matching_model and 'user' in matching_model:
            prompt_template = prompt_template.format(
                system=matching_model['system'], user=matching_model['user'])
        elif 'user' in matching_model:
            prompt_template = prompt_template.format(user=matching_model['user'])

        quantize = matching_model.get('quantize', None)
        torch_dtype = matching_model.get('torch_dtype', 'auto')

        # Configuring quantization
        if quantize in ['int8', 'int4']:
            if quantize == 'int8':
                bnb_config = BitsAndBytesConfig(load_in_8bit=True)
            elif quantize == 'int4':
                bnb_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type='nf4',
                    bnb_4bit_compute_dtype=torch.float16
                )
        else:
            bnb_config = None

        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        torch.set_default_device(device)

        tokenizer = AutoTokenizer.from_pretrained(model_id)
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            device_map=device,
            torch_dtype=torch_dtype,
            trust_remote_code=True,
            quantization_config=bnb_config
        )
    else:
        print('Model information not found for:', model_name)
except Exception as e:
    raise e

## Load Data

In [None]:
def find_first_empty_string_row_index(responses_df):
    responses_columns = [f'Response_{i}' for i in range(1, len(QUESTIONS))]

    # Check for NaN or empty string values in the response columns
    nan_or_empty_indices = responses_df[responses_columns].isna() | (responses_df[responses_columns] == '')

    # Check if any NaN or empty string values exist
    if nan_or_empty_indices.any().any():
        # Find the index of the first row with NaN or empty string values
        return nan_or_empty_indices.any(axis=1).idxmax()
    else:
        return None

transcripts_dir = '../../Getting_Transcripts'
transcripts_file_name = 'merged_filtered_videos_transcripts.csv'
responses_dir = '../../../Results/LLMs_Responses'
topics_to_include = ['Spina Bifida', 'Flat Feet', 'Cluster Headache', 'Trigger Finger', 'Pudendal Nerve']

prompt_type = 'GS_prompting'
topics = 'last_5_topics'
results_file_name = f'{model_name}-{topics}-{prompt_type}'

responses_df = load_responses_df(transcripts_dir, transcripts_file_name, responses_dir, results_file_name, question_type)

print(responses_df.shape)
responses_df.head(2)

In [None]:
if 'Topic' not in responses_df.columns:
    experts_file = '../../../Videos_and_DISCERN_data/filtered_experts_scores.csv'
    experts_df = pd.read_csv(experts_file)

    responses_df = responses_df.merge(experts_df[['Video ID', 'Topic']], on='Video ID', how='left')
    responses_df.insert(2, 'Topic', responses_df.pop('Topic'))
    responses_df = responses_df[responses_df['Topic'].isin(topics_to_include)]
    responses_df = responses_df.reset_index(drop=True)

print('responses_df shape:', responses_df.shape)
responses_df.head(2)

## Configuring Generation

In [None]:
generation_config = GenerationConfig(
    max_new_tokens=250,
    # num_beams=2,                  # default is 1
    # early_stopping=True,
    do_sample=False,
    # temperature = 0.8,            # set between 0.5-1 if do_sample=True
    # repetition_penalty=1.2,
    eos_token_id=model.config.eos_token_id,
    pad_token_id=model.config.eos_token_id,
)

## Processing Prompts, Inputs and Responses

#### Processing one question at a time, very slow.  
Check next cells for batch inferencing


In [None]:
"""
print_response = True       # To print response for each question

for index, row in responses_df.iterrows():
    print(f'Started with video ID: {video_id} | Index: {index}')
    video_id = row['Video ID']
    transcript = row['Transcript']

    for question_num in range(1, len(QUESTIONS) + 1):
        column_name = f'Response_{question_num}'
        if row[column_name] == '':
            instruction = ' '.join([QUESTION_HEAD, QUESTIONS[question_num - 1], QUESTION_TAIL])
            prompt = prompt_template.format(instruction=instruction, transcript=transcript)

            input = tokenizer(prompt, return_tensors='pt', return_attention_mask=False)
            input = input.to(device)

            output_token_ids = model.generate(**input, generation_config=generation_config)
            # llm_output = run_llm(pipe)
            llm_response = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]

            check_and_store_response(llm_response, responses_df, video_id, question_num, remove_prompt=True, print_response=print_response)
"""

#### Inferencing `batch_size` questions in each transcript

In [None]:
tokenizer.pad_token = tokenizer.eos_token   # needed for batch inference
tokenizer.padding_side = 'left'             # decoder-only models require left padding_side

# start_video_idx = 0
batch_size = 15             # Adjust as needed. Max 15 (as the number of questions)
print_response = True       # To print response for each question
start_video_idx = find_first_empty_string_row_index(responses_df)

def generate_prompt(question_num, transcript):
    instruction = ' '.join([QUESTION_HEAD, QUESTIONS[question_num - 1], QUESTION_TAIL])
    return prompt_template.format(instruction=instruction, transcript=transcript)

def generate_responses(prompts):
    encoded_batch_inputs = tokenizer(prompts, return_tensors='pt', padding=True,).to(device)

    # Generate responses for the batch
    with torch.no_grad():   # Disable gradients for inference
        outputs_token_ids = model.generate(**encoded_batch_inputs, generation_config=generation_config)
    del encoded_batch_inputs                # to free memory

    decoded_responses = tokenizer.batch_decode(outputs_token_ids, skip_special_tokens=True)
    del outputs_token_ids                   # to free memory
    
    torch.cuda.empty_cache()

    return decoded_responses

def store_responses(responses, responses_df, video_id, question_num):
    for i, response in enumerate(responses, start=question_num - len(responses) + 1):
        check_and_store_response(response, responses_df, video_id, i, remove_prompt=True, print_response=print_response)

def process_questions_and_responses(responses_df, start_video_idx, batch_size):
    if start_video_idx is None:
        return

    video_ids = responses_df['Video ID'][start_video_idx:]
    transcripts = responses_df['Transcript'][start_video_idx:]

    for video_index, (video_id, transcript) in enumerate(zip(video_ids, transcripts), start=start_video_idx):
        print('\nStarted with video: ', video_id, ' |  Index: ', video_index)

        prompts_batch = [] 
        # Iterate over questions and create prompts
        for question_num in range(1, len(QUESTIONS) + 1):
            prompt = generate_prompt(question_num, transcript)
            prompts_batch.append(prompt)

            # Generate responses for prompts in batches
            if len(prompts_batch) == batch_size or question_num == len(QUESTIONS):
                responses_batch = generate_responses(prompts_batch)
                
                store_responses(responses_batch, responses_df, video_id, question_num)
                del responses_batch         # to free memory
                prompts_batch.clear()       # Clear prompts after processing in batch

%time process_questions_and_responses(responses_df, start_video_idx, batch_size)

#### Batch `n` number of transcripts (`n * 15` prompts) at a time


In [None]:
# Batch "n" number of transcripts at a time
"""
tokenizer.pad_token = tokenizer.eos_token   # needed for batch inference
tokenizer.padding_side = 'left'             # decoder only models require left padding_side

start_video_idx = find_first_empty_string_row_index(responses_df)
n = 2                       # Number of transcripts
batch_size = n * 15         # or smaller
print_response = True       # To print response for each question

def generate_prompts(transcripts):
    prompts = []
    for transcript in transcripts:
        for question in QUESTIONS:
            instruction = ' '.join([QUESTION_HEAD, question, QUESTION_TAIL])
            prompt = prompt_template.format(instruction=instruction, transcript=transcript)
            prompts.append(prompt)
    return prompts

def generate_responses(prompts, batch_size):
    responses = []
    for i in range(0, len(prompts), batch_size):
        batch_prompts = prompts[i:i + batch_size]
        encoded_batch_inputs = tokenizer(batch_prompts, return_tensors='pt', padding=True,).to(device)

        with torch.no_grad():  # Disable gradients for inference
            outputs_token_ids = model.generate(**encoded_batch_inputs, generation_config=generation_config)
        del encoded_batch_inputs

        decoded_responses = tokenizer.batch_decode(outputs_token_ids, skip_special_tokens=True)
        del outputs_token_ids

        responses.extend(decoded_responses)
        del decoded_responses

        torch.cuda.empty_cache()
        
    return responses

def store_responses(responses, video_ids_batch, responses_df, batch_start_index=0):
    for i, response in enumerate(responses):
        video_index = (batch_start_index + i) % len(video_ids_batch)    # Determine the index of the video for this prompt
        video_id = video_ids_batch[video_index]     # Get the video ID corresponding to the video index
        question_num = (i) % len(QUESTIONS) + 1     # Calculate the question number based on the current iteration

        check_and_store_response(response, responses_df, video_id, question_num, remove_prompt=True, print_response=print_response)

def generate_outputs(responses_df, start_video_idx, batch_size, n):
    if start_video_idx is None:
        return

    video_ids = responses_df['Video ID'].values[start_video_idx:]
    transcripts = responses_df['Transcript'].values[start_video_idx:]
    num_videos = len(responses_df)

    for video_index in range(start_video_idx, num_videos, n):
        video_ids_batch = video_ids[video_index : min(video_index + n, num_videos)]
        transcripts_batch = transcripts[video_index : min(video_index + n, num_videos)]

        print(f"\nStarted with video(s): {', '.join(video_ids_batch)} | "
            f'Index: {video_index} to {min(video_index + n, num_videos) - 1}')

        prompts_batch = generate_prompts(transcripts_batch)

        responses_batch = generate_responses(prompts_batch, batch_size)
        
        store_responses(responses_batch, video_ids_batch, responses_df, video_index)

%time generate_outputs(responses_df, start_video_idx, batch_size, n)
"""

## Explore Results

In [None]:
display_from_index = start_video_idx
index_of_q1 = responses_df.columns.get_loc("Q1")

responses_df.iloc[display_from_index:, index_of_q1:index_of_q1+15].head()

In [None]:
columns_with_none = (responses_df.isna() | (responses_df == '')).sum()
columns_with_none

In [None]:
rows_with_none = responses_df[responses_df.isna().any(axis=1)]
rows_with_none

In [None]:
indices_with_problems = responses_df[responses_df['Problem'].apply(lambda x: len(x) > 0)].index.tolist()
print(indices_with_problems)

In [None]:
from IPython.display import display, HTML

if indices_with_problems:
    index_with_problem = 7
    responses_with_problem_list = list(responses_df.loc[index_with_problem, 'Problem'])
    print("List of questions with problem:", responses_with_problem_list)

    response_with_problem = responses_with_problem_list[0]
    text = responses_df.loc[index_with_problem, f'Response_{response_with_problem}']
    display(HTML("<div style='white-space: pre-wrap;'>{}</div>".format(text)))

In [None]:
# display the full responses for a specific transcript
index_to_display = 0
for question_num in range(1, 16):
    print(f'Q{question_num}:\n', responses_df.at[index_to_display,f'Response_{question_num}'])

## Store Results in a CSV File

In [None]:
csv_output_file = os.path.join(responses_dir, f'{results_file_name}-response.csv')

responses_df.to_csv(csv_output_file, index=False, encoding='utf-8')