In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.documents.base import Document
from langchain.vectorstores import FAISS
from gnews import GNews
from tqdm import tqdm
import torch
import json
import os

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = ""
device = 'cpu'

In [9]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = 'cuda:0'

In [4]:
with open('./database.json') as file:
    data = json.load(file)
    
documents = []
for item in data:
    article = item['article']
    metadata = item.get('metadata', {})
    document = Document(page_content=article, metadata=metadata)
    documents.append(document)

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=94, chunk_overlap=10)
documents = text_splitter.split_documents(documents)

In [6]:
embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
embedding_model = "mixedbread-ai/mxbai-embed-large-v1"

embeddings = HuggingFaceEmbeddings(
                            model_name=embedding_model,
                            model_kwargs={'device': device},
                            )

04/18/2024 12:51:56 PM - Load pretrained SentenceTransformer: mixedbread-ai/mxbai-embed-large-v1


In [7]:
vector_db = FAISS.from_documents(documents, embeddings)
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

04/18/2024 12:52:07 PM - Loading faiss with AVX512 support.
04/18/2024 12:52:07 PM - Successfully loaded faiss with AVX512 support.


In [8]:
from scipy import stats
import numpy as np
import time

def simple_test_retrieving(len_sentence=1, num_sentence=1):
    elements = ['test ' * len_sentence] * num_sentence

    ### avoid first time bias?
    for element in elements:
        retriever.get_relevant_documents(element)
    
    start = time.time()
    for element in elements:
        retriever.get_relevant_documents(element)
    end = time.time()

    return end - start

def test_retrieving(n=10, confidence=.99, len_sentence=1, num_sentence=1):
    times = []
    for _ in range(n):
        times.append(simple_test_retrieving(len_sentence, num_sentence))
    
    sample_mean = np.mean(times)
    sample_std = np.std(times, ddof=1)
    
    t_value = stats.t.ppf((1 + confidence) / 2, len(times) - 1)

    margin_of_error = t_value * sample_std / np.sqrt(len(times))
    
    print(f"Time = {sample_mean: .3f}s +/- {margin_of_error: .3f}s, [at {int(confidence*100)}%]")

    return sample_mean, margin_of_error

def full_test_retrieving(list_len_sentence=[]):
    H = {}
    for len_sentence in list_len_sentence:
        time, error = test_retrieving(len_sentence=len_sentence, num_sentence=20)
        H[len_sentence] = {'time': f'{time:.3f}', 'error': f'{error:.3f}'}

    return H

In [9]:
list_len_sentence = [2, 8, 32, 128, 512, 1024, 2048]
results = full_test_retrieving(list_len_sentence)
results

Time =  0.295s +/-  0.003s, [at 99%]
Time =  0.298s +/-  0.003s, [at 99%]
Time =  0.295s +/-  0.003s, [at 99%]
Time =  0.356s +/-  0.002s, [at 99%]
Time =  0.755s +/-  0.002s, [at 99%]
Time =  0.785s +/-  0.003s, [at 99%]
Time =  0.833s +/-  0.002s, [at 99%]


{2: {'time': '0.295', 'error': '0.003'},
 8: {'time': '0.298', 'error': '0.003'},
 32: {'time': '0.295', 'error': '0.003'},
 128: {'time': '0.356', 'error': '0.002'},
 512: {'time': '0.755', 'error': '0.002'},
 1024: {'time': '0.785', 'error': '0.003'},
 2048: {'time': '0.833', 'error': '0.002'}}

## llama 3

In [3]:
token = "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"

In [5]:
prompt

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a pirate chatbot who always responds in pirate speak!<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'

In [12]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map=device, token=token)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [22]:
user_question = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful chatbot who answers question to a human user.<|eot_id|><|start_header_id|>user<|end_header_id|>

Explain Rieman's hypothesis.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"""

inputs = tokenizer(f"[INST]{user_question}[/INST]", return_tensors='pt').to(device)
with torch.no_grad():
    outputs = model.generate(**inputs, do_sample=False, max_new_tokens=1000)

answer = tokenizer.decode(outputs[0,inputs['input_ids'].shape[1]:])
print(answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Riemann's Hypothesis (RH) is a conjecture in mathematics that deals with the distribution of prime numbers. It was first proposed by Bernhard Riemann in 1859 and remains one of the most famous unsolved problems in mathematics.

The hypothesis states that all non-trivial zeros of the Riemann zeta function (ζ(s)) lie on a vertical line in the complex plane, specifically on the line where the real part of the complex number s is equal to 1/2. In other words, RH asserts that all non-trivial zeros of ζ(s) satisfy the equation:

Re(s) = 1/2

where Re(s) is the real part of s.

The Riemann zeta function is defined as:

ζ(s) = 1 + 1/2^s + 1/3^s + 1/4^s +...

The zeta function is intimately connected with the distribution of prime numbers. In fact, the location of the zeros of the zeta function determines the distribution of prime numbers. RH has important implications for many areas of mathematics, including number theory, algebra, and a

: 