In [3]:
"""
Basic similarity search example. Used in the original txtai demo.
Requires streamlit to be installed.
  pip install streamlit
"""

import os

import streamlit as st

from txtai.embeddings import Embeddings


class Application:
    """
    Main application.
    """

    def __init__(self):
        """
        Creates a new application.
        """

        # Create embeddings model, backed by sentence-transformers & transformers
        self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})

    def run(self):
        """
        Runs a Streamlit application.
        """

        st.title("Similarity Search")
        st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.")

        data = [
            "US tops 5 million confirmed virus cases",
            "Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
            "Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
            "The National Park Service warns against sacrificing slower friends in a bear attack",
            "Maine man wins $1M from $25 lottery ticket",
            "Make huge profits without work, earn up to $100,000 a day",
        ]

        data = st.text_area("Data", value="\n".join(data))
        query = st.text_input("Query")

        data = data.split("\n")

        if query:
            # Get index of best section that best matches query
            uid = self.embeddings.similarity(query, data)[0][0]
            st.write(data[uid])


@st.cache(allow_output_mutation=True)
def create():
    """
    Creates and caches a Streamlit application.
    Returns:
        Application
    """

    return Application()


if __name__ == "__main__":
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    # Create and run application
    app = create()
    app.run()

2022-08-16 15:47:56.426 INFO    numexpr.utils: NumExpr defaulting to 4 threads.


ModuleNotFoundError: No module named 'txtai'

In [2]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.12.0-py2.py3-none-any.whl (9.1 MB)
Collecting pyarrow>=4.0
  Downloading pyarrow-9.0.0-cp39-cp39-win_amd64.whl (19.6 MB)
Collecting altair>=3.2.0
  Using cached altair-4.2.0-py3-none-any.whl (812 kB)
Collecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
Collecting blinker>=1.0.0
  Using cached blinker-1.5-py2.py3-none-any.whl (12 kB)
Collecting semver
  Downloading semver-2.13.0-py2.py3-none-any.whl (12 kB)
Collecting gitpython!=3.1.19
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
Collecting pydeck>=0.1.dev5
  Using cached pydeck-0.7.1-py2.py3-none-any.whl (4.3 MB)
Collecting validators>=0.2
  Using cached validators-0.20.0-py3-none-any.whl
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)
Collecting smmap<6,>=3.0.1
  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)
Installing collected packages: smmap, gitdb, validators, semver, pympler, pydeck, pyarrow, gitpython, blinker, al

Successfully installed altair-4.2.0 blinker-1.5 gitdb-4.0.9 gitpython-3.1.27 pyarrow-9.0.0 pydeck-0.7.1 pympler-1.0.1 semver-2.13.0 smmap-5.0.0 streamlit-1.12.0 validators-0.20.0
