In [None]:
import torch
import evaluate
import transformers
import numpy as np
import pandas as pd

from tqdm import tqdm
from datetime import datetime
from transformers import AutoTokenizer
from transformers import GenerationConfig
from transformers import AutoModelForCausalLM
from evaluate.visualization import radar_plot
from nltk.tokenize import sent_tokenize

In [None]:
dir_root = "Data/"
log_prefix = "log_train_history_"
max_token = 256 # max token from dataset
min_token = 100 # min token from dataset
type = pd.read_json(f"{dir_root}test_topic.json", orient="records")["input"].values[0]

In [None]:
def calc_cossin(ref, pred):
    st = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    
    sentences_ref = sent_tokenize(ref)
    encode_ref = st.encode(sentences_ref)
    
    sentences_pred = sent_tokenize(pred)
    encode_pred = st.encode(sentences_pred)
    
    cosine_scores = util.cos_sim(encode_pred, encode_ref).numpy()
    
    return sum([item.max() for item in cosine_scores]) / len(cosine_scores)

In [None]:
def get_best_model(prefix, metric, data):
    columns = [prefix + str(round(treshold,1)) + metric for treshold in np.arange(1.0, -0.1, -0.1)]
    describe = data[columns].describe().T
    val = describe.max()["mean"]
    name = describe[describe["mean"] == val].index[0]

    return name, val

In [None]:
def measure(col, samples):
    cossins = []
    rouge1 = []
    rouge2 = []
    rougeL = []
    rougeLsum = []
    meteors = []
    bleus = []
    bleu_precisions_n1 = []
    bleu_precisions_n2 = []
    bleu_precisions_n3 = []
    bleu_precisions_n4 = []
    bleu_brevity_penalty = []
    bleu_length_ratio = []
    bleu_translation_length = []
    bleu_reference_length = []
    
    rouge = evaluate.load('rouge')
    meteor = evaluate.load('meteor')
    bleu = evaluate.load("bleu")
    st = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    
    for i in tqdm(range(len(samples))):
        # Cosine sim
        cossins.append(calc_cossin(samples["output"].values[i], test_topic[col].values[i]))
        
        # rouge
        r_rouge = rouge.compute(
            predictions=[samples[col].values[i]],
            references=[samples["output"].values[i]]
        )
        rouge1.append(r_rouge["rouge1"])
        rouge2.append(r_rouge["rouge2"])
        rougeL.append(r_rouge["rougeL"])
        rougeLsum.append(r_rouge["rougeLsum"])

        # meteor
        r_meteor = meteor.compute(predictions=[samples[col].values[i]], references=[samples["output"].values[i]])
        meteors.append(r_meteor["meteor"])

        # bleu
        try:
            r_bleu = bleu.compute(predictions=[samples[col].values[i]], references=[samples["output"].values[i]])
            bleus.append(r_bleu["bleu"])
            bleu_precisions_n1.append(r_bleu["precisions"][0])
            bleu_precisions_n2.append(r_bleu["precisions"][1])
            bleu_precisions_n3.append(r_bleu["precisions"][2])
            bleu_precisions_n4.append(r_bleu["precisions"][3])
            bleu_brevity_penalty.append(r_bleu["brevity_penalty"])
            bleu_length_ratio.append(r_bleu["length_ratio"])
            bleu_translation_length.append(r_bleu["translation_length"])
            bleu_reference_length.append(r_bleu["reference_length"])
        except:
            bleus.append(0.0)
            bleu_precisions_n1.append(0.0)
            bleu_precisions_n2.append(0.0)
            bleu_precisions_n3.append(0.0)
            bleu_precisions_n4.append(0.0)
            bleu_brevity_penalty.append(0.0)
            bleu_length_ratio.append(0.0)
            bleu_translation_length.append(0.0)
            bleu_reference_length.append(0.0)

    samples[f"{col}_cs"] = cossins
    samples[f"{col}_rouge1"] = rouge1
    samples[f"{col}_rouge2"] = rouge2
    samples[f"{col}_rougeL"] = rougeL
    samples[f"{col}_rougeLsum"] = rougeLsum
    samples[f"{col}_meteor"] = meteors
    samples[f"{col}_bleu"] = bleus
    samples[f"{col}_bleu_precisions_n1"] = bleu_precisions_n1
    samples[f"{col}_bleu_precisions_n2"] = bleu_precisions_n2
    samples[f"{col}_bleu_precisions_n3"] = bleu_precisions_n3
    samples[f"{col}_bleu_precisions_n4"] = bleu_precisions_n4
    samples[f"{col}_bleu_brevity_penalty"] = bleu_brevity_penalty
    samples[f"{col}_bleu_length_ratio"] = bleu_length_ratio
    samples[f"{col}_bleu_translation_length"] = bleu_translation_length
    samples[f"{col}_bleu_reference_length"] = bleu_reference_length

In [None]:
def generate(prompt, tokenizer, model):
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
    )# .to(device)
    input_ids = inputs["input_ids"].cuda()    

    generation_config = GenerationConfig(
        temperature=0.6,
        top_p=0.95,
        do_sample=True,
        repetition_penalty=1.2,
    )

    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        min_new_tokens=min_token,
        max_new_tokens=max_token,
        pad_token_id = 0,
        eos_token_id = tokenizer.eos_token_id 
    )

    result = []
    for s in generation_output.sequences:
        result.append(tokenizer.decode(s))

    return result

In [None]:
def run(samples, tokenizer, model):
    results = []
    for i in tqdm(range(0, len(samples))):
        prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{samples["instruction"].values[i]}
### Input:
{samples["input"].values[i]}
### Response:"""
    
        result = generate(prompt, tokenizer, model)
        results.append(result[0].split("### Response:\n")[-1])
        
    return results

In [None]:
import torch
import transformers
from sentence_transformers import  util
from sentence_transformers import SentenceTransformer

class DomainExpert():
    def __init__(
        self, 
        top_k=10, 
        min_new_tokens=0,
        max_new_tokens=512,         
        num_return_sequences=1,
        temperature=0.9,
        top_p=0.6,
        repetition_penalty=1.2,
        do_sample=True,
        max_input=4000,
        model_st_name="sentence-transformers/all-MiniLM-L6-v2",
        model_chat_name="meta-llama/Llama-2-7b-chat-hf",
        devices="auto"):
            
        self.top_k = top_k
        self.min_new_tokens = min_new_tokens
        self.max_new_tokens = max_new_tokens
        self.num_return_sequences = num_return_sequences
        self.temperature = temperature
        self.top_p = top_p
        self.repetition_penalty=repetition_penalty
        self.do_sample = do_sample
        self.model_st_name = model_st_name
        self.model_chat_name = model_chat_name
        self.devices = devices
        self.max_input = 4000
                
        self.model_st = SentenceTransformer(self.model_st_name)

        self.pipeline_chat = transformers.pipeline(
            "text-generation",
            model=self.model_chat_name,
            torch_dtype=torch.float16,
            device_map=self.devices,
        )

        self.tokenizer_chat = transformers.AutoTokenizer.from_pretrained(self.model_chat_name)

    def get_number_of_tokens(self, context):
        return len(self.tokenizer_chat(context)["input_ids"])
    
    def sentence_encode(self, sentence):
        return self.model_st.encode([sentence])

    def mask_prompt(self, question, context = ""):
        return f"""[INST]Below is an instruction that describes a task! Write a response that appropriately completes the request. To answer the instruction, use the context if available!
Instruction: {question}
Context: {context}[/INST]"""
        
    def create_prompt(self, question, context):
        """
        Megadja a promptot, hogy mi alapján válaszoljon a modell.
        """
        prompt = ""
        context_tmp = ""
        if len(context) > 0:
            for text in context:            
                context_tmp += f"{text}\n"
                prompt_tmp = self.mask_prompt(question, context_tmp)
                if len(self.tokenizer_chat(prompt_tmp)["input_ids"]) < self.max_input:
                    prompt = prompt_tmp
                else:
                    break
        else:
            prompt = self.mask_prompt(question)
        
        return prompt
    
    def generate_answare(self, question, context):
        """
        Vissza adja a kérdés és a kontextus alapján a választ!
        """
        prompt = self.create_prompt(question, context)
        # print(question)
        # print(context)
        # print(prompt)
        sequences = self.pipeline_chat(
            prompt,
            do_sample = self.do_sample,
            top_k = self.top_k,
            num_return_sequences = self.num_return_sequences,
            min_new_tokens = self.min_new_tokens,
            max_new_tokens = self.max_new_tokens,
            temperature = self.temperature,
            top_p = self.top_p,
            repetition_penalty = self.repetition_penalty,
            eos_token_id = self.tokenizer_chat.eos_token_id
        )

        answ = sequences[0]['generated_text'].split("[/INST]")[1][2:].split("\n")
        answ = "\n".join([item for item in answ if len(item) > 0])
        # print(answ)        
        return answ

In [None]:
import torch
import transformers
from sentence_transformers import  util
from sentence_transformers import SentenceTransformer

class DomainExpertV2():
    def __init__(
        self, 
        top_k=10, 
        min_new_tokens=0,
        max_new_tokens=512,         
        num_return_sequences=1,
        temperature=0.9,
        top_p=0.6,
        repetition_penalty=1.2,
        do_sample=True,
        max_input=4096,
        model_st_name="sentence-transformers/all-MiniLM-L6-v2",
        model_chat_name="meta-llama/Llama-2-7b-chat-hf",
        tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
        type="Domain Expert",
        devices="auto"):
            
        self.top_k = top_k
        self.min_new_tokens = min_new_tokens
        self.max_new_tokens = max_new_tokens
        self.num_return_sequences = num_return_sequences
        self.temperature = temperature
        self.top_p = top_p
        self.repetition_penalty=repetition_penalty
        self.do_sample = do_sample
        self.model_st_name = model_st_name
        self.model_chat_name = model_chat_name
        self.tokenizer_name = tokenizer_name
        self.type = type
        self.devices = devices
        self.max_input = max_input - self.max_new_tokens
                
        self.model_st = SentenceTransformer(self.model_st_name)
        self.tokenizer_chat = transformers.AutoTokenizer.from_pretrained(self.tokenizer_name, 
                                                                         padding=True, 
                                                                         truncation=True) 
            
        self.pipeline_chat = transformers.pipeline(
            "text-generation",
            model=self.model_chat_name,
            tokenizer=self.tokenizer_chat,
            torch_dtype=torch.float16,
            device_map=self.devices,
        )

    def get_number_of_tokens(self, context):
        return len(self.tokenizer_chat(context)["input_ids"])
    
    def sentence_encode(self, sentence):
        return self.model_st.encode([sentence])

    def mask_promt(self, question, context = ""):
        return f"""[INST]Below is an instruction that describes a task! Write a response that appropriately completes the request. To answer the instruction, use the context if available!
### Instruction: 
{question}

### Input:
{self.type}

### Context: 
{context}
[/INST]

### Response:
"""
        
    def create_prompt(self, question, context):
        """
        Megadja a promptot, hogy mi alapján válaszoljon a modell.
        """
        prompt = ""
        context_tmp = ""
        if len(context) > 0:
            for text in context:            
                context_tmp += f"{text}\n"
                # prompt_tmp = f"""[INST]Summarize and explain the given context in term of given question and start your answare below way 'Based on the information available to me': 
                # Question: {question}
                # Context: {context_tmp}[/INST]"""    
                # prompt_tmp = f"""[INST]Summarize and explain the given context in terms of the given question and start your answer below way 'Based on the information available to me':
                # Question: {question}
                # Context: {context_tmp}[/INST]"""
                # prompt_tmp = f"""[INST]Answer the given question based on the given context.
                # Question: {question}
                # Context: {context_tmp}[/INST]"""
                prompt_tmp = self.mask_promt(question, context_tmp)
                if len(self.tokenizer_chat(prompt_tmp)["input_ids"]) < self.max_input:
                    prompt = prompt_tmp
                else:
                    break
        else:
            prompt = self.mask_promt(question)
                
        return prompt
    
    def generate_answare(self, question, context):
        """
        Vissza adja a kérdés és a kontextus alapján a választ!
        """
        prompt = self.create_prompt(question, context)
        # print("question:", question)
        # print("context:", context)        
        # print("prompt len", len(self.tokenizer_chat(prompt)["input_ids"]))
        # print("prompt:", prompt)
        
        sequences = self.pipeline_chat(
            prompt,
            do_sample = self.do_sample,
            top_k = self.top_k,
            num_return_sequences = self.num_return_sequences,
            min_new_tokens = self.min_new_tokens,
            max_new_tokens = self.max_new_tokens,
            temperature = self.temperature,
            top_p = self.top_p,
            repetition_penalty = self.repetition_penalty,
            eos_token_id = self.tokenizer_chat.eos_token_id,
            pad_token_id = self.tokenizer_chat.eos_token_id
        )

        # print(sequences)
        answ = sequences[0]['generated_text'].split("[/INST]")[1]
        answ = answ.replace("### Response:\n","")
        answ = answ.split("\n")
        answ = "\n".join([item for item in answ if len(item) > 0])
        # print(answ)
        # except:
        #     answ = ""
        # print(answ)
        return answ

In [None]:
import numpy as np
import pandas as pd

from sentence_transformers import  util

class ExtractorCOVID():
    def __init__(self, path, domain_expert):
        self.path = path
        self.domain_expert = domain_expert
        
        self.meta = pd.read_csv(f"{self.path}/metadata.csv", low_memory=False)
        self.meta = self.meta[~self.meta["title"].isnull()]
        self.meta = self.meta[~self.meta["abstract"].isnull()]

        self.embedded_vectors = np.load(f"{self.path}titles.emb")

        with open(f"{self.path}titles.idx","r") as f:
            self.index_to_vectors = f.read().splitlines()
        self.index_to_vectors = [index for index in self.index_to_vectors]

    def get_realted_cosine_scores(self):
        if len(self.selected_indexes) > 0 and len(self.cosine_scores):
            return self.cosine_scores[self.selected_indexes]
            
    def get_context(self, question=None, treshold=0.75, N=100):
        # calculate similarities
        self.embedded_question = self.domain_expert.sentence_encode(question)
        self.cosine_scores = util.cos_sim(self.embedded_question, self.embedded_vectors)
        self.cosine_scores = self.cosine_scores.reshape(len(self.cosine_scores[0])).numpy()
    
        # Sort similarities
        self.selected_indexes = np.argsort(self.cosine_scores)
        # Filter limit similarities
        self.selected_indexes = self.selected_indexes[::-1][:N]              
        # Filter treshold
        self.selected_indexes = [self.selected_indexes[i] for i in range(len(self.cosine_scores[self.selected_indexes])) if self.cosine_scores[self.selected_indexes[i]] >= treshold]
        # self.cosine_scores = np.argsort(self.cosine_scores)

        # find selected elements
        self.selected_elements = [self.index_to_vectors[si] for si in self.selected_indexes]        
        self.selected_references = self.meta[self.meta["cord_uid"].isin(self.selected_elements)][["publish_time", "authors", "pdf_json_files", "pmc_json_files"]]
        
        # Create context
        self.title = self.meta[self.meta["cord_uid"].isin(self.selected_elements)]["title"].values
        self.context = self.meta[self.meta["cord_uid"].isin(self.selected_elements)]["abstract"].values
        
        return self.context

In [None]:
import json
import numpy as np
import pandas as pd

from sentence_transformers import  util

class ExtractorBase():
    def __init__(self, path, domain_expert):
        self.path = path
        self.domain_expert = domain_expert
        
        with open(f"{self.path}qaa.json") as f:
            self.qaa = json.load(f)

        with open(f"{self.path}qaa.idx","r") as f:
            self.index_to_vectors = f.read().splitlines()
        self.index_to_vectors = [int(index) for index in self.index_to_vectors]
            
        self.embedded_vectors = np.load(f"{self.path}qaa.emb")

    def get_realted_cosine_scores(self):
        if len(self.selected_indexes) > 0 and len(self.cosine_scores):
            return self.cosine_scores[self.selected_indexes]

    def clear_item(self, item):
        item = item.replace(" .", "")
        item = item.replace("..", ".")
        return item
        
    def get_context(self, question=None, treshold=0.75, N=100):
        # calculate similarities
        self.embedded_question = self.domain_expert.sentence_encode(question)
        self.cosine_scores = util.cos_sim(self.embedded_question, self.embedded_vectors)
        self.cosine_scores = self.cosine_scores.reshape(len(self.cosine_scores[0])).numpy()
    
        # Sort similarities
        self.selected_indexes = np.argsort(self.cosine_scores)
        # Filter limit similarities
        self.selected_indexes = self.selected_indexes[::-1][:N]              
        # Filter treshold
        self.selected_indexes = [self.selected_indexes[i] for i in range(len(self.cosine_scores[self.selected_indexes])) if self.cosine_scores[self.selected_indexes[i]] >= treshold]
        # self.cosine_scores = np.argsort(self.cosine_scores)        

        # Find elements
        self.selected_elements = [self.qaa[self.index_to_vectors[si]] for si in self.selected_indexes]
        
        # Find duplicated elements
        self.duplicated_indexes = pd.DataFrame([se["answare"] for se in self.selected_elements])
        self.duplicated_indexes = self.duplicated_indexes[self.duplicated_indexes.duplicated()]
        if len(self.duplicated_indexes) > 0:
            self.duplicated_indexes = self.duplicated_indexes.groupby(list(self.duplicated_indexes)).apply(lambda x: x.index[0]).tolist()
            self.selected_indexes = [self.selected_indexes[i] for i in range(0, len(self.selected_indexes)) if i not in self.duplicated_indexes]
            self.selected_elements = [self.qaa[self.index_to_vectors[si]] for si in self.selected_indexes]

        # find selected elements        
        self.selected_answares = [se["answare"] for se in self.selected_elements]
        self.selected_answares = pd.DataFrame(self.selected_answares)
        
        # Create context
        self.context = [self.clear_item(item[0]) for item in self.selected_answares.values]

        return self.context

In [None]:
import numpy as np
import pandas as pd

from sentence_transformers import  util

class ExtractorBaseF():
    def __init__(self, path, domain_expert):
        self.max_return = 200 # It surely is more than 4000 tokens
        
        self.path = path
        self.domain_expert = domain_expert 
        
        self.sentences = pd.read_json(f"{self.path}/sentences.json")
        self.embedded_vectors = np.load(f"{self.path}sentences.emb")

    def get_realted_cosine_scores(self):
        if len(self.selected_indexes) > 0 and len(self.cosine_scores):
            return self.cosine_scores[self.selected_indexes]

    def get_context(self, question=None, treshold=0.75, N=100):
        # calculate similarities
        self.embedded_question = self.domain_expert.sentence_encode(question)
        self.cosine_scores = util.cos_sim(self.embedded_question, self.embedded_vectors)
        self.cosine_scores = self.cosine_scores.reshape(len(self.cosine_scores[0])).numpy()
    
        # Sort similarities
        self.selected_indexes = np.argsort(self.cosine_scores)
        # Filter limit similarities
        self.selected_indexes = self.selected_indexes[::-1][:N]              
        # Filter treshold
        self.selected_indexes = [self.selected_indexes[i] for i in range(len(self.cosine_scores[self.selected_indexes])) if self.cosine_scores[self.selected_indexes[i]] >= treshold]
        # self.cosine_scores = np.argsort(self.cosine_scores)

        self.context = list(self.sentences[self.sentences.index.isin(self.selected_indexes)]["sentences"].values[:200])
        
        self.selected_references = self.sentences[self.sentences.index.isin(self.selected_indexes)]
        self.selected_references = self.selected_references.drop(columns=["sentences"])
        self.selected_references = self.selected_references[:200]
                                                                         
        return self.context

In [None]:
def rag_test(domain_expert, extractor, data, mark, N=100, log_dir="Data/"):

    for treshold in np.arange(1.0, -0.1, -0.1):
        treshold = round(treshold,1)
        answares = []
    
        # pbar = tqdm(range(len(data["instruction"].values)))
        for i in range(len(data["instruction"].values)):   
            context = extractor.get_context(data["instruction"].values[i], treshold=treshold, N=100)
            # if len(context) == 0:
            #     answares.append("")
            # else:
            answare = domain_expert.generate_answare(
                question = data["instruction"].values[i], 
                context = context)        
            answares.append(answare)
        
            # pbar.set_description(f"Treshold: {treshold}, context len: {len(context)}")
            out = f"{datetime.now()} : {treshold} : {len(context)} : {i}"
            print(out)
            with open(f"{log_dir}rag_test_{mark}status.log","a") as f:
                f.write(out + "\n")
            
        data[mark + str(treshold)] = answares

    return data

In [None]:
from transformers import AutoConfig

config_gptj = AutoConfig.from_pretrained('EleutherAI/gpt-j-6B')
print('EleutherAI/gpt-j-6B', config_gptj.max_position_embeddings)

config_opt = AutoConfig.from_pretrained('facebook/opt-6.7b')
print('facebook/opt-6.7b', config_opt.max_position_embeddings)

config_llama = AutoConfig.from_pretrained('huggyllama/llama-7b')
print('huggyllama/llama-7b', config_llama.max_position_embeddings)

config_llama2 = AutoConfig.from_pretrained('meta-llama/Llama-2-7b-hf')
print('meta-llama/Llama-2-7b-hf', config_llama2.max_position_embeddings)

## Test RAG vs Find-Tune Models

### GPTJ FN

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
model = AutoModelForCausalLM.from_pretrained("Models/gptj-6b-v20231214/checkpoint-1888", device_map="auto")

In [None]:
results = run(test_topic, tokenizer, model)
test_topic["gptj_fn"] = results
test_topic.to_json(f"{dir_root}/test_topic_gptj_fn.json", orient="records")
test_topic.head()

### OPT FN

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-6.7b")
model = AutoModelForCausalLM.from_pretrained("Models/opt-6.7b-v20231214/checkpoint-3145", device_map="auto")

In [None]:
results = run(test_topic, tokenizer, model)
test_topic["opt_fn"] = results
test_topic.to_json(f"{dir_root}/test_topic_opt_fn.json", orient="records")
test_topic.head()

### LLama FN

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b")
model = AutoModelForCausalLM.from_pretrained("Models/llama-7b-v20231119/checkpoint-910", device_map="auto")

In [None]:
results = run(test_topic, tokenizer, model)
test_topic["llama_fn"] = results
test_topic.to_json(f"{dir_root}/test_topic_llama_fn.json", orient="records")
test_topic.head()

### LLama2 FN

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("Models/llama-2-7b-hf-v20231217/checkpoint-1888", device_map="auto")

In [None]:
results = run(test_topic, tokenizer, model)
test_topic["llama2_fn"] = results
test_topic.to_json(f"{dir_root}/test_topic_llama2_fn.json", orient="records")
test_topic.head()

### LLama2 RAG with base model

In [None]:
domain_expert = DomainExpert(min_new_tokens=min_token, 
                             max_new_tokens=max_token)

#### Sentece based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
ebf = ExtractorBaseF(path = dir_root, domain_expert = domain_expert)
print(ebf.embedded_vectors.shape)
test_topic = rag_test(domain_expert, ebf, test_topic, mark = "llama2_rag_s_", N=1000)
test_topic.to_json(f"{dir_root}/test_topic_llama2_base_s.json", orient="records")
test_topic.head()

#### Question based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
eb = ExtractorBase(path = dir_root, domain_expert = domain_expert)
print(eb.embedded_vectors.shape)
test_topic = rag_test(domain_expert, eb, test_topic, "llama2_rag_q_")
test_topic.to_json(f"{dir_root}/test_topic_llama2_base_q.json", orient="records")
test_topic.head()

### LLama2 RAG with FN model

In [None]:
domain_expert = DomainExpertV2(min_new_tokens=min_token, 
                               max_new_tokens=max_token,
                               type=type,
                               max_input=config_llama2.max_position_embeddings,
                               model_chat_name="Models/llama-2-7b-hf-v20231217/checkpoint-1888",
                               tokenizer_name="meta-llama/Llama-2-7b-hf")

#### Sentece based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
ebf = ExtractorBaseF(path = dir_root, domain_expert = domain_expert)
print(ebf.embedded_vectors.shape)
test_topic = rag_test(domain_expert, ebf, test_topic, mark = "llama2_fn_rag_s_", N=1000)
test_topic.to_json(f"{dir_root}/test_topic_llama2_fn_s.json", orient="records")
test_topic.head()

#### Question based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
eb = ExtractorBase(path = dir_root, domain_expert = domain_expert)
print(eb.embedded_vectors.shape)
test_topic = rag_test(domain_expert, eb, test_topic, "llama2_fn_rag_q_")
test_topic.to_json(f"{dir_root}/test_topic_llama2_fn_q.json", orient="records")
test_topic.head()

### LLama2 RAG with Context FN model

In [None]:
domain_expert = DomainExpertV2(min_new_tokens=min_token, 
                               max_new_tokens=max_token,
                               type=type,
                               max_input=config_llama2.max_position_embeddings,
                               model_chat_name="Models/llama-2-7b-hf-context-v20231218/checkpoint-3776",
                               tokenizer_name="huggyllama/llama-7b")

#### Sentece based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
ebf = ExtractorBaseF(path = dir_root, domain_expert = domain_expert)
print(ebf.embedded_vectors.shape)
test_topic = rag_test(domain_expert, ebf, test_topic, mark = "llama2c_rag_s_", N=1000)
test_topic.to_json(f"{dir_root}/test_topic_llama2c_s.json", orient="records")
test_topic.head()

#### Question based

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
eb = ExtractorBase(path = dir_root, domain_expert = domain_expert)
print(eb.embedded_vectors.shape)
test_topic = rag_test(domain_expert, eb, test_topic, "llama2c_rag_q_")
test_topic.to_json(f"{dir_root}/test_topic_llama2c_q.json", orient="records")
test_topic.head()

### LLama2 Base

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="auto")

In [None]:
results = run(test_topic, tokenizer, model)
test_topic["llama2_base"] = results
test_topic.to_json(f"{dir_root}/test_topic_llama2_base.json", orient="records")
test_topic.head()

## Concat

In [None]:
test_topic = pd.read_json(f"{dir_root}test_topic.json", orient="records")
test_topic_llama2_base_s = pd.read_json(f"{dir_root}test_topic_llama2_base_s.json", orient="records")
test_topic_llama2_base_q = pd.read_json(f"{dir_root}test_topic_llama2_base_q.json", orient="records")
test_topic_llama2_fn_s = pd.read_json(f"{dir_root}test_topic_llama2_fn_s.json", orient="records")
test_topic_llama2_fn_q = pd.read_json(f"{dir_root}test_topic_llama2_fn_q.json", orient="records")
test_topic_llama2c_s = pd.read_json(f"{dir_root}test_topic_llama2c_s.json", orient="records")
test_topic_llama2c_q = pd.read_json(f"{dir_root}test_topic_llama2c_q.json", orient="records")

test_topic_gptj_fn = pd.read_json(f"{dir_root}/test_topic_gptj_fn.json", orient="records")
test_topic_opt_fn = pd.read_json(f"{dir_root}/test_topic_opt_fn.json", orient="records")
test_topic_llama_fn = pd.read_json(f"{dir_root}/test_topic_llama_fn.json", orient="records")
test_topic_llama2_fn = pd.read_json(f"{dir_root}/test_topic_llama2_fn.json", orient="records")
test_topic_llama2_base = pd.read_json(f"{dir_root}/test_topic_llama2_base.json", orient="records")

test_topic["gptj_fn"] = test_topic_gptj_fn["gptj_fn"]
test_topic["opt_fn"] = test_topic_opt_fn["opt_fn"]
test_topic["llama_fn"] = test_topic_llama_fn["llama_fn"]
test_topic["llama2_fn"] = test_topic_llama2_fn["llama2_fn"]
test_topic["llama2_base"] = test_topic_llama2_base["llama2_base"]

for r in ["llama2_rag_s_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:
    test_topic[r] = test_topic_llama2_base_s[r]

for r in ["llama2_rag_q_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:    
    test_topic[r] = test_topic_llama2_base_q[r]

for r in ["llama2_fn_rag_s_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:
    test_topic[r] = test_topic_llama2_fn_s[r]

for r in ["llama2_fn_rag_q_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:
    test_topic[r] = test_topic_llama2_fn_q[r]
    
for r in ["llama2c_rag_s_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:        
    test_topic[r] = test_topic_llama2c_s[r]

for r in ["llama2c_rag_q_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:    
    test_topic[r] = test_topic_llama2c_q[r]

test_topic.to_json(f"{dir_root}/test_topic_gen.json", orient="records")

In [None]:
test_topic_llama2_fn_s = pd.read_json(f"{dir_root}test_topic_llama2_fn_s.json", orient="records")
test_topic_llama2_fn_q = pd.read_json(f"{dir_root}test_topic_llama2_fn_q.json", orient="records")
test_topic = pd.read_json(f"{dir_root}test_topic_measured.json", orient="records")

for r in ["llama2_fn_rag_s_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:
    test_topic[r] = test_topic_llama2_fn_s[r]

for r in ["llama2_fn_rag_q_" + str(round(treshold,1)) for treshold in np.arange(1.0, -0.1, -0.1)]:
    test_topic[r] = test_topic_llama2_fn_q[r]

test_topic.to_json(f"{dir_root}/test_topic_measured.json", orient="records")