## 1. Import libraries and modules

In [None]:
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import time
import random
import nltk
from tqdm import tqdm
import torch
import os
import sys
import random
import json
import numpy as np
from gpt4all import GPT4All
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
sys.path.append(os.path.abspath('../funcs'))
import functions as fn

## 2. Import and process the RGB dataset

In [None]:
url = "https://raw.githubusercontent.com/chen700564/RGB/master/data/en.json"
data = fn.process_json(url)
data = random.sample(data, 5)
queries = [item["query"] for item in data]
answers = [item["answer"][0] for item in data]

## 3. Run Extractive Open Source Models

### 3.1. Load config and mapping dicts

In [None]:
# Load the config
with open("../config/config.json", "r") as f:
    config = json.load(f)

# Load the model mapping
with open('../config/models_mapping.json', 'r') as f:
    model_mapping = json.load(f)

### 3.2. Set up local variables

In [None]:
# Set up noise thresholds
stride = config['globals']['stride']
noise_thresholds = fn.get_noise_levels(stride)

# Set up device
device = GPT4All.list_gpus()[0]

# Set up models
gen_model_1 = GPT4All(config['generative_models_open_source']['gen_model_1'], device=device)
gen_model_2 = GPT4All(config['generative_models_open_source']['gen_model_2'], device=device)
gen_model_3 = GPT4All(config['generative_models_open_source']['gen_model_3'], device=device)
gen_model_4 = GPT4All(config['generative_models_open_source']['gen_model_4'], device=device)
models = [ gen_model_1, gen_model_2, gen_model_3, gen_model_4 ]

# Set up models name
gen_model_1.name = "gpt4all.gguf"
gen_model_2.name = "Meta-Llama.gguf"
gen_model_3.name = "Nous-Hermes.gguf"
gen_model_4.name = "Phi.gguf"

# Set up separator
separator = config['globals']['separator']

# Set up max tokens
max_tokens = config['globals']['max_tokens']

# Number of experiments
num_experiments = config['globals']['num_experiments']

# Set up paths
input_paths = config['generative_models_open_source']['input_paths']
output_paths = config['generative_models_open_source']['output_paths']
os.makedirs(input_paths, exist_ok=True)
os.makedirs(output_paths, exist_ok=True)

# Set up prompt
with open("../config/prompts/prompt.txt", "r") as f:
    prompt_template = f.read()

### 3.3. Run models

In [None]:
execution_times = []
overall_times = []

for model in models:

    model_start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained(model)
    loaded_model = AutoModelForQuestionAnswering.from_pretrained(model)
    qa_pipeline = pipeline("question-answering", model=loaded_model, tokenizer=tokenizer, device=device)

    model_times = []
    
    for exp_num in range(1, num_experiments + 1):

        random.seed(2024 + exp_num)

        results = []
        exp_start_time = time.time()
        for query, positive_context, negative_context, answer in tqdm(zip(queries, [item["positive"] for item in data], [item["negative"] for item in data], answers), total=len(queries)):
            result = {
                'Query': query,
                'Correct Answer': answer,
            }
            for noise_level, value in noise_thresholds.items():
                noise_start_time = time.time()
                
                with model.chat_session():
                    mixed_context = fn.create_mixed_context(positive_context, negative_context, value, max_tokens, separator)
                    context_concat = separator.join(mixed_context)
                    prompt = prompt_template.format(context_concat=context_concat, query=query)
                    generated_answer = model.generate(prompt)
                    result.update({f'{noise_level} Predicted Answer': generated_answer})
                    result[f'Jaccard {noise_level}'] = fn.apply_jaccard(result, f'{noise_level} Predicted Answer', 'Correct Answer')
                    result[f'Cosine {noise_level}'] = fn.apply_cosine(result, f'{noise_level} Predicted Answer', 'Correct Answer')
                    result[f'EM {noise_level}'] = fn.apply_exact_match(result, f'{noise_level} Predicted Answer', 'Correct Answer')
                    result[f'EM - 2V {noise_level}'] = fn.apply_exact_match_2v(result, f'{noise_level} Predicted Answer', 'Correct Answer')
                    
                    noise_end_time = time.time()
                    noise_times = noise_end_time - noise_start_time
                    
                    execution_times.append({
                        'Model': model,
                        'Noise Level': noise_level,
                        'Average Time': noise_times,
                        'Standard Deviation': 0
                    })

            results.append(result)
        
        results_df = pd.DataFrame(results)
        filename_results = os.path.join(input_paths, f"exp_{exp_num}_{model.name}.json")
        results_df.to_json(filename_results, orient='records', lines=True)
        exp_end_time = time.time()
        model_times.append(exp_end_time - exp_start_time)
    
    model_end_time = time.time()
    
    avg_time = np.mean(model_times)
    std_time = np.std(model_times)

    print(f"Execution time for model {model}: {(model_end_time - model_start_time) / 60:.2f} minutes.")

    overall_times.append({
        'Model': model,
        'Average Time': avg_time,
        'Standard Deviation': std_time
    })

execution_times_df = pd.DataFrame(execution_times)
overall_times_df = pd.DataFrame(overall_times)
filename_exec_time = os.path.join(input_paths, "exec_time.xlsx")
with pd.ExcelWriter(filename_exec_time, engine='xlsxwriter') as writer:
    execution_times_df.to_excel(writer, sheet_name='Noise Level Times', index=False)
    overall_times_df.to_excel(writer, sheet_name='Overall Model Times', index=False)

## 4. Compute Metrics

### 4.1. Compute all metrics

In [None]:
output_file = os.path.join(output_paths, 'all_metrics.xlsx')
all_results = {}

files = os.listdir(input_paths)
for i, file in enumerate(tqdm(files, desc="Processing files"), start=1):
    if file.endswith('.json'):
        experiment_num = int(file.split('_')[1]) 
        model_name = file.split('_')[2].replace('.json', '') 
        sheet_name = model_mapping.get(model_name, model_name)
        input_path = os.path.join(input_paths, file)
        result_df = fn.compute_metrics(input_path, stride) 
        result_df.insert(0, 'Experiment Number', experiment_num)
        if sheet_name not in all_results:
            all_results[sheet_name] = result_df
        else:
            all_results[sheet_name] = pd.concat([all_results[sheet_name], result_df], ignore_index=True)

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, result_df in all_results.items():
        result_df.to_excel(writer, sheet_name=sheet_name, index=False)

### 4.2. Compute mean metrics

In [None]:
input_file = '../metrics/gen/all_metrics.xlsx'
output_file = '../metrics/gen/final_metrics.xlsx'
final_results = {}

excel_data = pd.read_excel(input_file, sheet_name=None)

for sheet_name, df in excel_data.items():
    if 'Experiment Number' not in df.columns:
        raise ValueError(f"'Experiment Number' column not found in sheet {sheet_name}")
    metrics = df['Metric'].unique()
    noise_levels = list(noise_thresholds.keys())
    result_data = {
        'Metric': metrics,
    }
    for noise_level in noise_levels:
        result_data[f'{noise_level}_Mean'] = []
        result_data[f'{noise_level}_Std'] = []
        for metric in metrics:
            metric_df = df[df['Metric'] == metric]
            result_data[f'{noise_level}_Mean'].append(metric_df[noise_level].mean())
            result_data[f'{noise_level}_Std'].append(metric_df[noise_level].std())
    result_df = pd.DataFrame(result_data)
    final_results[sheet_name] = result_df

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, result_df in final_results.items():
        result_df.to_excel(writer, sheet_name=sheet_name, index=False)