In [3]:
!pip install langchain
!pip install torch
!pip install sentence_transformers
!pip install faiss-cpu
!pip install huggingface-hub
!pip install pypdf
!pip -q install accelerate
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install  llama-cpp-python --no-cache-dir
!pip -q install git+https://github.com/huggingface/transformers

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [5]:
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader

In [3]:
!mkdir Data

In [4]:
cd Data/

/content/Data


In [5]:
!wget https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf

--2024-01-27 16:42:09--  https://pgcag.files.wordpress.com/2010/01/48lawsofpower.pdf
Resolving pgcag.files.wordpress.com (pgcag.files.wordpress.com)... 192.0.72.24, 192.0.72.25
Connecting to pgcag.files.wordpress.com (pgcag.files.wordpress.com)|192.0.72.24|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 104926 (102K) [application/pdf]
Saving to: ‘48lawsofpower.pdf’


2024-01-27 16:42:10 (1.45 MB/s) - ‘48lawsofpower.pdf’ saved [104926/104926]



In [6]:
cd ..

/content


In [6]:
#load pdf files
loader = PyPDFDirectoryLoader("/content/Data/")
data = loader.load()

In [7]:
print(data)



In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=20)
text_chunks = text_splitter.split_documents(data)

In [9]:
len(text_chunks)

231

In [10]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  return self.fget.__get__(instance, owner)()


In [11]:
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)

In [13]:
!mkdir Model

In [14]:
cd Model

/content/Model


In [15]:
!wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true

--2024-01-27 16:42:25--  https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true
Resolving huggingface.co (huggingface.co)... 18.172.134.4, 18.172.134.88, 18.172.134.24, ...
Connecting to huggingface.co (huggingface.co)|18.172.134.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/repos/46/12/46124cd8d4788fd8e0879883abfc473f247664b987955cc98a08658f7df6b826/14466f9d658bf4a79f96c3f3f22759707c291cac4e62fea625e80c7d32169991?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27mistral-7b-instruct-v0.1.Q4_K_M.gguf%3B+filename%3D%22mistral-7b-instruct-v0.1.Q4_K_M.gguf%22%3B&Expires=1706632945&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcwNjYzMjk0NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy80Ni8xMi80NjEyNGNkOGQ0Nzg4ZmQ4ZTA4Nzk4ODNhYmZjNDczZjI0NzY2NGI5ODc5NTVjYzk4YTA4NjU4ZjdkZjZiODI2

In [16]:
cd ..

/content


In [12]:
llm = LlamaCpp(
    streaming = True,
    model_path="/content/Model/mistral-7b-instruct-v0.1.Q4_K_M.gguf?download=true",
    temperature=0.75,
    top_p=1,
    verbose=True,
    n_ctx=4096,
    n_gpu_layers = -1
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 
Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '32768', 'general.name': 'mistralai_mistral-7b-instruct-v0.1', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '14336', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '8', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}


In [13]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 5}))

In [14]:
query = "Can you tell me the story of Queen Elizabeth?"

In [15]:
ans = qa.invoke(query)

In [16]:
ans['result']

" Yes, Queen Elizabeth I managed to avoid marriage and war by dangling the possibility of marriage to all who courted her. She forged alliances with the countries these suitors came from, all for the benefit of England. By keeping her independence above all, Elizabeth protected her power and made herself an object of worship. She ruled until the age of eighty when she was forced to abdicate. Throughout her rule, she eliminated son after son, anyone who would challenge her throne. Cleopatra, the queen of Egypt, played her siblings against each other and seduced Marc Antony, having her younger sister Arsinoe executed in the process. After the emperor's death, Wu Zetian, a concubine of his, seduced his son in the royal urinal and befriended his wife, allowing her to get a royal position."

In [None]:
!pip install gradio --use-deprecated=legacy-resolver

In [18]:
import gradio as gr

def greet(query):
    ans = qa.invoke(query)
    return ans['result']

demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox")

if __name__ == "__main__":
    demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5c3f86cfa4b493fee3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
