In [6]:
!pip install streamlit streamlit_chat langchain  chromadb tiktoken pypdf sentence-transformers


Collecting chromadb
  Downloading chromadb-0.4.21-py3-none-any.whl (508 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m508.6/508.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.105.0-py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.1/93.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)
  Downloading uvicorn-0.25.0-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.3/60.3 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.1

In [7]:
!pip install peft accelerate bitsandbytes trl safetensors



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
path = "/content/drive/MyDrive/data/chroma_db"

In [2]:
%%writefile app.py

from langchain.text_splitter import  CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import CTransformers
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import sys
import streamlit as st
from streamlit_chat import message
import tempfile
from langchain.document_loaders import PyPDFLoader
from langchain.llms import HuggingFacePipeline
from torch import cuda, bfloat16
import transformers
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoConfig
from transformers import AutoModelForCausalLM
from transformers import pipeline
from transformers import AutoTokenizer

user_token = st.sidebar.text_input(
    label="#### your hugging face authentication's token 👇",
    placeholder="Paste your HF koken here",
    type="password")


if user_token:


    path = "/content/drive/MyDrive/data/chroma_db"

    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
    vectors = Chroma(persist_directory = path, embedding_function = embeddings)




    model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
    use_flash_attention = False

    # set quantization configuration to load large model with less GPU memory
    # this requires the `bitsandbytes` library
    bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type='nf4',
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=bfloat16
          )

    # begin initializing HF items, need auth token for these
    hf_auth = user_token
    use_flash_attention = False

    model_config = AutoConfig.from_pretrained(
      model_id,
      token=hf_auth
          )

    model = AutoModelForCausalLM.from_pretrained(
      model_id,
      trust_remote_code=True,
      config=model_config,
      quantization_config=bnb_config,
      use_flash_attention_2=use_flash_attention,

      device_map='auto',
      torch_dtype=torch.float16,
      token=hf_auth
        )

    tokenizer = AutoTokenizer.from_pretrained(
      model_id,
      token=hf_auth
          )

    generate_text = pipeline(
      model=model, tokenizer=tokenizer,
      return_full_text=True,  # langchain expects the full text
      task='text-generation',
      # we pass model parameters here too
      temperature=0.4,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
      max_new_tokens=5000,  # mex number of tokens to generate in the output
      repetition_penalty=1.1  # without this output begins repeating
                  )


    llm = HuggingFacePipeline(pipeline=generate_text)

    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    retrieval = vectors.as_retriever()






    def conversational_chat(query):

        chain = ConversationalRetrievalChain.from_llm(llm = llm,

                                                  retriever=retrieval,
                                                  memory = memory ,
                                                  )

        result = chain({"question": query, "chat_history": st.session_state['history']})
        st.session_state['history'].append((query, result["answer"]))

        del chain ; torch.cuda.empty_cache()


        return result["answer"]

    if 'history' not in st.session_state:
        st.session_state['history'] = []

    if 'generated' not in st.session_state:
        st.session_state['generated'] = ["Hello ! Ask me any question that you expect in your interview " +  " 🤗"]

    if 'past' not in st.session_state:
        st.session_state['past'] = ["Hey ! 👋"]

    #container for the chat history
    response_container = st.container()
    #container for the user's text input
    container = st.container()

    with container:
        with st.form(key='my_form', clear_on_submit=True):

            user_input = st.text_input("Query:", placeholder=" ask you question here (:", key='input')
            submit_button = st.form_submit_button(label='Send')


        if submit_button and user_input:


            output = conversational_chat(user_input)

            st.session_state['past'].append(user_input)
            st.session_state['generated'].append(output)

    if st.session_state['generated']:
        with response_container:
            for i in range(len(st.session_state['generated'])):
                message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
                message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")


Overwriting app.py


In [3]:
!npm install localtunnel


[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
+ localtunnel@2.0.2
updated 1 package and audited 36 packages in 0.946s

3 packages are looking for funding
  run `npm fund` for details

found 2 [93mmoderate[0m severity vulnerabilities
  run `npm audit fix` to fix them, or `npm audit` for details
[K[?25h

In [4]:
!streamlit run /content/app.py &>/content/logs.txt &

In [5]:
!npx localtunnel --port 8501 & curl ipv4.icanhazip.com

34.125.58.146
[K[?25hnpx: installed 22 in 2.211s
your url is: https://fluffy-bats-return.loca.lt
