In [1]:
import pandas as pd
from tqdm import tqdm
import time
import random
import nltk
from tqdm import tqdm
import tiktoken
import os
from gpt4all import GPT4All
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /home/phaxssi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/phaxssi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
import functions as fn

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
url = "https://raw.githubusercontent.com/chen700564/RGB/master/data/en.json"
data = fn.process_json(url)
data = data[:1]
# data = random.sample(data, 1)
queries = [item["query"] for item in data]
answers = [item["answer"][0] for item in data]

In [None]:
prompt_template = """
You are an AI assistant specializing in Question Answering. Your task is to read the provided context carefully and then generate the most accurate and concise answer to the question based on the context.

Context: {context_concat}

Question: {query}

Answer:
"""

In [None]:
device = GPT4All.list_gpus()[0]
print(device)

In [None]:
# gen_model_1 = GPT4All("gpt4all-13b-snoozy-q4_0.gguf", device="cuda:Tesla T4", verbose=True)
# gen_model_1.name = "gpt4all-13b-snoozy-q4_0.gguf"

Downloading: 100%|██████████| 7.37G/7.37G [12:38<00:00, 9.72MiB/s]
Verifying: 100%|██████████| 7.37G/7.37G [00:32<00:00, 228MiB/s]
Model downloaded to '/root/.cache/gpt4all/gpt4all-13b-snoozy-q4_0.gguf'


In [None]:
gen_model_1 = GPT4All("gpt4all-13b-snoozy-q4_0.gguf", device = device)
gen_model_2 = GPT4All("Meta-Llama-3-8B-Instruct.Q4_0.gguf", device = device)
gen_model_1.name = "gpt4all-13b-snoozy-q4-0.gguf"
gen_model_2.name = "Meta-Llama-3-8B-InstructQ4-0.gguf"

In [None]:
start_time = time.time()

output_path = '/results/gen'
os.makedirs(output_path, exist_ok=True)

# Set a seed for reproducibility
random.seed(2024)

# Define noise thresholds
noise_thresholds = {
    'Noise_0': 0.0,
    'Noise_20': 0.20,
    'Noise_40': 0.40,
    'Noise_60': 0.60,
    'Noise_80': 0.80,
    'Noise_100': 1.0
}

# Models to be tested
models = [gen_model_1, gen_model_2]
separator = " <|> "
max_total_tokens = 1600
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")

# Number of experiments
num_experiments = 5

for model in models:
    for exp_num in range(1, num_experiments + 1):
        random.seed(2024 + exp_num)

        results = []
        for query, positive_context, negative_context, answer in tqdm(zip(queries, [item["positive"] for item in data], [item["negative"] for item in data], answers), total=len(queries)):
            result = {
                'Query': query,
                'Correct Answer': answer,
            }

            with model.chat_session():
                for noise_level, label in noise_thresholds.items():
                    mixed_context = fn.create_mixed_context(positive_context, negative_context, label, max_total_tokens, separator)
                    context_concat = separator.join(mixed_context)
                    prompt = prompt_template.format(context_concat=context_concat, query=query)
                    generated_answer = model.generate(prompt)
                    result.update({f'{noise_level} Predicted Answer': generated_answer})

            results.append(result)

        results_df = pd.DataFrame(results)

        for label in noise_thresholds.keys():
            results_df[f'EM {label}'] = results_df.apply(lambda row: fn.apply_exact_match(row, f'{label} Predicted Answer', 'Correct Answer'), axis=1)
            results_df[f'Jaccard {label}'] = results_df.apply(lambda row: fn.apply_jaccard(row, f'{label} Predicted Answer', 'Correct Answer'), axis=1)
            results_df[f'Cosine {label}'] = results_df.apply(lambda row: fn.apply_cosine(row, f'{label} Predicted Answer', 'Correct Answer'), axis=1)
            results_df[f'EM - 2V {label}'] = results_df.apply(lambda row: fn.apply_exact_match_2v(row, f'{label} Predicted Answer', 'Correct Answer'), axis=1)

        # Save results to a file with the experiment number and model name
        filename = os.join(output_path, f"exp_{exp_num}_{model.name}.json")
        results_df.to_json(filename, orient='records', lines=True)

end_time = time.time()
print(f"Total execution time: {(end_time - start_time) / 60} minutes.")

  0%|          | 0/5 [00:00<?, ?it/s]Exception ignored on calling ctypes callback function: <function LLModel._prompt_callback at 0x7e26b246b2e0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gpt4all/_pyllmodel.py", line 601, in _prompt_callback
    @staticmethod
KeyboardInterrupt: 
100%|██████████| 5/5 [11:44<00:00, 140.81s/it]


Total execution time: 11.82841622432073 minutes.


In [None]:
# input_paths = '/content/drive/My Drive/tornado-tasks/results/gen/'
# files = os.listdir(input_paths)

# for file in files:
#     if file.endswith('.json'):
#         print(file)
#         model_name = file.split('.')[0]
#         full_path = os.path.join(input_paths, file)
#         for data_chunk in fn.read_json_in_chunks(full_path, fn.cols_to_use, chunk_size=1000):
#             with open(f'/content/drive/My Drive/tornado-tasks/text/gen/{model_name}.txt', 'a') as output_file:
#                 print(model_name)
#                 for part in fn.format_results(data_chunk):
#                     output_file.write(part + '\n')

In [None]:
threshold = 0.8
input_paths = '/results/gen'
output_path = '/metrics/gen'
os.makedirs(output_path, exist_ok=True)
output_file = os.join(output_path, 'all_metrics.xlsx')

all_results = {}

files = os.listdir(input_paths)

for i, file in enumerate(tqdm(files, desc="Processing files"), start=1):
    if file.endswith('.json'):
        experiment_num = int(file.split('_')[1])
        print(f'Experiment: {experiment_num}')
        model_name = file.split('_')[2].replace('.json', '')
        sheet_name = model_name
        print(f"Processing file {i}/{len(files)}: {file}")
        input_path = os.path.join(input_paths, file)
        result_df = fn.compute_metrics(input_path, threshold)
        result_df.insert(0, 'Experiment Number', experiment_num)

        if sheet_name not in all_results:
            all_results[sheet_name] = result_df
        else:
            all_results[sheet_name] = pd.concat([all_results[sheet_name], result_df], ignore_index=True)

with pd.ExcelWriter(output_file) as writer:
    for sheet_name, result_df in all_results.items():
        result_df.to_excel(writer, sheet_name=sheet_name, index=False)

Processing files:   0%|          | 0/1 [00:00<?, ?it/s]

gpt4all-13b-snoozy-q4_0.gguf.json
/content/drive/My Drive/tornado-tasks/results/gen/gpt4all-13b-snoozy-q4_0.gguf.json


Processing files: 100%|██████████| 1/1 [00:05<00:00,  5.60s/it]


In [None]:
input_file = '/metrics/gen/all_metrics.xlsx'
output_file = '/metrics/gen/final_metrics.xlsx'

# Leer el archivo Excel original
excel_data = pd.read_excel(input_file, sheet_name=None)

final_results = {}

# Procesar cada hoja del archivo Excel
for sheet_name, df in excel_data.items():
    # Asegurarse de que la columna 'Experiment Number' esté presente
    if 'Experiment Number' not in df.columns:
        raise ValueError(f"'Experiment Number' column not found in sheet {sheet_name}")

    # Identificar las métricas y niveles de ruido
    metrics = df['Metric'].unique()
    noise_levels = ['Noise_0', 'Noise_20', 'Noise_40', 'Noise_60', 'Noise_80', 'Noise_100']

    # Crear DataFrame para resultados finales
    result_data = {
        'Metric': metrics,
    }

    # Calcular la media y desviación estándar para cada métrica y nivel de ruido
    for noise_level in noise_levels:
        result_data[f'{noise_level}_Mean'] = []
        result_data[f'{noise_level}_Std'] = []
        for metric in metrics:
            metric_df = df[df['Metric'] == metric]
            result_data[f'{noise_level}_Mean'].append(metric_df[noise_level].mean())
            result_data[f'{noise_level}_Std'].append(metric_df[noise_level].std())

    result_df = pd.DataFrame(result_data)
    final_results[sheet_name] = result_df

# Guardar los resultados en un nuevo archivo Excel
with pd.ExcelWriter(output_file) as writer:
    for sheet_name, result_df in final_results.items():