In [1]:
# grab data as langchain docs
import pickle
with open('datasets/arxiv_cache/1210.4967.pkl', 'rb') as f:
    d = pickle.load(f)

docs = d['pages']

In [2]:
# split any documents that are too long
from langchain.text_splitter import CharacterTextSplitter
splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=30)
chunked_docs = splitter.split_documents(docs)

In [3]:
# create the database
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5'))

In [4]:
# load model, tokenizer, retriever
import torch
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM
model_dir = "models/cosmosage_v2/"
#model = AutoGPTQForCausalLM.from_quantized(model_dir)
model = AutoModelForCausalLM.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 4})

In [5]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain.chains import LLMChain

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.4,
    repetition_penalty=1.01,
    return_full_text=True,
    max_new_tokens=1000,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """Answer the question based on your knowledge. Use the following context to help:

{context}

USER: {question}
ASSISTANT:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

2024-02-19 20:57:56.447459: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-19 20:57:56.475773: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-19 20:57:56.475803: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-19 20:57:56.476542: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-19 20:57:56.481495: I tensorflow/core/platform/cpu_feature_guar

In [6]:
from langchain.schema.runnable import RunnablePassthrough

retriever = db.as_retriever()

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)


In [7]:
question = "How does DAN suppress the input impedance of the SQUID?"

In [8]:
result = llm_chain.invoke({"context":"", "question": question})
print(result['text'])


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 DAN (Digital Active Nulling) suppresses the input impedance of the SQUID by actively nulling the current waveform at the input coil. This is achieved through a feedback loop where the SQUID's output is continuously compared to a reference voltage. Any deviation from the reference voltage is compensated by generating a nulling current that is inverted 180 degrees and fed back into the input coil. This nulling current effectively cancels out the input current, reducing the input impedance of the SQUID. The DAN technique is particularly useful in applications where high sensitivity and fast response times are required, such as in MRI systems and superconducting quantum interference devices (SQUIDs) used for measuring magnetic fields.


In [9]:
result_rag = rag_chain.invoke(question)
print(result_rag['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 DAN suppresses the input impedance of the SQUID by providing a nulling signal that actively zeroes the SQUID current across the bolometer bandwidth. This nulling signal is injected digitally and helps to improve the stability and linearity of the SQUID.
