In [None]:
!pip install pypdf -q
!pip install sentence_transformers -q
!pip install llama_index -q
!pip install pinecone-client langchain -q
!pip install accelerate -q
!pip install bitsandbytes -q
!pip install llama-index-llms-huggingface -q
!pip install llama-index-embeddings-langchain -q
!pip install llama-index-settings -q
!pip install huggingface -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.4/15.4 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m86.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.9/312.9 kB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.9/141.9 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m83.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

### **App**

In [None]:
!pip install streamlit -q
!pip install huggingface -q

In [None]:
%%writefile app.py
import streamlit as st
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate
from llama_index.core import Settings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding
from llama_index.core.node_parser import SentenceSplitter
import torch
from transformers import BitsAndBytesConfig
from huggingface_hub import login
login(token='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')

def main():
    torch.cuda.empty_cache()
    st.title("Question and Answering Assistant")

    # Embedding and LLM Setup
    def setup_models():
        embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
            llm_int8_enable_fp32_cpu_offload=True,
        )
        llm = HuggingFaceLLM(
            model_name="meta-llama/Llama-2-7b-chat-hf",
            tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
            query_wrapper_prompt=PromptTemplate("<s> [INST] {query_str} [/INST]"),
            context_window=3900,
            model_kwargs={'quantization_config': quantization_config},
            device_map="auto",
        )
        Settings.llm = llm
        Settings.embed_model = embed_model
        Settings.text_splitter = SentenceSplitter(chunk_size=1024)
        Settings.chunk_size = 1024
        Settings.chunk_overlap = 20
        Settings.transformations = [SentenceSplitter(chunk_size=1024)]
        return embed_model, llm

    # Document Processing
    def process_documents(files):
        documents_dir = "uploaded_documents"
        os.makedirs(documents_dir, exist_ok=True)
        for i, file in enumerate(files):
            with open(os.path.join(documents_dir, f"document_{i}.pdf"), "wb") as f:
                f.write(file.getbuffer())
        return SimpleDirectoryReader(documents_dir).load_data()

    # Streamlit UI
    uploaded_files = st.file_uploader("Upload one or more PDF documents", accept_multiple_files=True)
    if uploaded_files:
        documents = process_documents(uploaded_files)
        st.write("Files uploaded successfully!")
        embed_model, llm = setup_models()  # Setup models only after upload
        index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
        query_engine = index.as_query_engine(llm=llm)
        user_input = st.text_input("Enter your question:")
        if user_input:
            response = query_engine.query(user_input)
            st.write(str(response))
    if torch.cuda.is_available():
        st.write(f"Total GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
        st.write(f"Current Memory Allocated: {torch.cuda.memory_allocated() / 1e6} MB")
        st.write(f"Current Memory Cached: {torch.cuda.memory_reserved() / 1e6} MB")
if __name__ == "__main__":
    main()


Writing app.py


In [None]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[K[?25h[37;40mnpm[0m [0m[34;40mnotice[0m[35m[0m created a lockfile as package-lock.json. You should commit this file.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
+ localtunnel@2.0.2
added 22 packages from 22 contributors and audited 22 packages in 1.5s

3 packages are looking for funding
  run `npm fund` for details

found 1 [93mmoderate[0m severity vulnerability
  run `npm audit fix` to fix them, or `npm audit` for details
[K[?25h

In [None]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
import urllib
print("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

Password/Enpoint IP for localtunnel is: 34.125.239.231


In [None]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 1.68s
your url is: https://free-horses-warn.loca.lt
^C


### Colab

In [None]:
from huggingface_hub import login
login(token='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')


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.ca

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt




In [None]:
documents = SimpleDirectoryReader("/content").load_data()
system_prompt = """
You are a Question and Answering assistant. Your goal is to answer questions based on the given documents
"""
query_wrapper_prompt = SimpleInputPrompt

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model = LangchainEmbedding(
	HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate

quantization_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_use_double_quant= True,
)

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM


llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-7b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    query_wrapper_prompt= PromptTemplate("<s> [INST] {query_str} [/INST]"),
    context_window=3900,
    model_kwargs={'quantization_config': quantization_config},
    device_map="auto",
)

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter

Settings.llm=llm
Settings.embed_model = embed_model
Settings.text_splitter = SentenceSplitter(chunk_size = 1024)
Settings.chunk_size = 1024
Settings.chunk_overlap = 20
Settings.transformations = [SentenceSplitter(chunk_size = 1024)]

In [None]:
index = VectorStoreIndex.from_documents(documents, embed_model = embed_model)
query_engine = index.as_query_engine(llm=llm)

In [None]:
from llama_index.core.response.notebook_utils import display_response

In [None]:
response = query_engine.query("What is this document?")
display_response(response)

**`Final Response:`** Based on the provided context information, it appears that this is a resume or curriculum vitae (CV) for a student named Mannu Dev Sah. The document highlights his academic qualifications, scholastic achievements, and technical skills, as well as his positions of responsibility, extracurricular activities, and design organizing experience. It also mentions his involvement in various projects and events, including a design challenge, a filmmaking competition, and a volunteer initiative. Overall, the document provides a comprehensive overview of Mannu's academic and extracurricular pursuits, showcasing his skills, achievements, and experiences.

### Check

In [None]:
type(response)

llama_index.core.base.response.schema.Response

In [None]:
response_str

' Based on the provided context information, this document appears to be a package manifest file for a collection of Node.js packages. The file contains information about each package, including its version, resolved URL, and integrity hash.\n\nSome of the packages mentioned in the file include:\n\n* `ms`: A package for working with Microsoft Office files.\n* `openurl`: A package for working with OpenURL files.\n* `require-directory`: A package for resolving directories.\n* `string-width`: A package for calculating the width of strings.\n* `strip-ansi`: A package for stripping Ansi escape sequences from strings.\n* `wrap-ansi`: A package for wrapping Ansi escape sequences in strings.\n* `yargs`: A package for working with command line arguments.\n* `y18n`: A package for working with i18n and l10n.\n\nThe file also includes information about the dependencies of each package, including the version numbers and the packages that each package requires.'

In [None]:
type("enter")

str