<a href="https://colab.research.google.com/github/machine-learning-upgrade/book_code/blob/main/Appendix_Email_Creation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

langchain - used here for our text splitter

openai - To access OpenAI API and the LLM models there


pymilvus - Milvus is an open source vector DB, but we'll be using Zilliz (managed Milvus)


gradio - This is the dashboarding framework we're using


pyarrow - Here I'm just looking to force the version so that it doesn't conflict with pymilvus


In [1]:
!pip install langchain==0.1.5 \
  openai==1.11.1 \
  pymilvus==2.3.6 \
  gradio==4.16.0 \
  langchain_openai==0.0.5 \
  pyarrow==14.0.0

Collecting langchain==0.1.5
  Downloading langchain-0.1.5-py3-none-any.whl (806 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m806.7/806.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai==1.11.1
  Downloading openai-1.11.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymilvus==2.3.6
  Downloading pymilvus-2.3.6-py3-none-any.whl (175 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gradio==4.16.0
  Downloading gradio-4.16.0-py3-none-any.whl (16.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.7/16.7 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain_openai==0.0.5
  Downloading langchain_openai-0.0.5-py3-none-any.whl (29 kB)
Collecting pyarrow==14.0.0
  Downloading pyarrow-14.0.0-cp310-cp3

For forcing the versions

Read our CSV in python.


In [1]:
import pandas as pd

# Read CSV file into a DataFrame
df = pd.read_csv('appendix_data.csv')

After you run this cell you'll need to load the data at https://zilliz.com

In [2]:
from pymilvus import (
    Collection,
    CollectionSchema,
    connections,
    DataType,
    FieldSchema,
    utility,
    MilvusClient
)
from openai import OpenAI


COLLECTION_NAME = 'content'  # Collection name
EMBEDDING_DIMENSION = 1536  # Embedding vector size, specified by OpenAI
ZILLIZ_CLLUSTER_URI = 'YOUR_ZILLIZ_URI'  # Endpoint URI obtained from Zilliz Cloud
ZILLIZ_API_KEY = 'YOUR_ZILLZ_API_KEY'
OPENAI_API_KEY = 'YOUR_OPENAI_API_KEY'

connections.connect(uri=ZILLIZ_CLLUSTER_URI, token=ZILLIZ_API_KEY, secure=True)


##  If the data already exists, drop it so we don't end up pushing up duplicates
if utility.has_collection(COLLECTION_NAME):
    utility.drop_collection(COLLECTION_NAME)

# Create collection which includes the id, title, and embedding.
fields = [
  FieldSchema(name='id', dtype=DataType.VARCHAR, is_primary=True, auto_id=True, max_length=36),
  FieldSchema(name='author', dtype=DataType.VARCHAR, max_length=200),
  FieldSchema(name='text', dtype=DataType.VARCHAR, max_length=2000),
  FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, dim=EMBEDDING_DIMENSION)
]

schema = CollectionSchema(fields=fields)
collection = Collection(name=COLLECTION_NAME, schema=schema)

# Create an index for the collection.
index_params = {
    'index_type': 'AUTOINDEX',
    'metric_type': 'IP',
    'params': {}
}


collection.create_index(field_name="vector", index_params=index_params)

milvusClient = MilvusClient(
    uri=ZILLIZ_CLLUSTER_URI,
    token=ZILLIZ_API_KEY)

openAIClient = OpenAI(api_key=OPENAI_API_KEY)


## initialize list to store data that will be inserted into the collection.  Then we're iterating through rows of the dataframe and appends information as a dictionary.  Giving us a list of dictionaries.
rows = []
for row in df.to_records():
  rows.append({
      'author': row[1],
      'text': row[2],
      'vector': openAIClient.embeddings.create(
          input=row[2],
          model='text-embedding-ada-002').data[0].embedding
  })

## inserts the data in "rows" into the collection
milvusClient.insert(COLLECTION_NAME, rows)


DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: a6acb06500d440f0a4d987d45b8bdfa7


['448076123400628635',
 '448076123400628636',
 '448076123400628637',
 '448076123400628638',
 '448076123400628639',
 '448076123400628640',
 '448076123400628641',
 '448076123400628642',
 '448076123400628643',
 '448076123400628644',
 '448076123400628645',
 '448076123400628646',
 '448076123400628647',
 '448076123400628648',
 '448076123400628649',
 '448076123400628650',
 '448076123400628651',
 '448076123400628652',
 '448076123400628653',
 '448076123400628654',
 '448076123400628655',
 '448076123400628656',
 '448076123400628657',
 '448076123400628658',
 '448076123400628659',
 '448076123400628660',
 '448076123400628661',
 '448076123400628662',
 '448076123400628663',
 '448076123400628664']

'itemgetter' is for extracting elements from data
structures.
'os' is for communicating with the operating system.
importing FAISS is for similarity search of vectors.

In [3]:
from operator import itemgetter
import os
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Milvus
from langchain.globals import set_debug, set_verbose
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from openai import OpenAI
from typing import List
from pymilvus import (
    connections,
    MilvusClient
)

set_verbose(True)
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

connections.connect(uri=ZILLIZ_CLLUSTER_URI, token=ZILLIZ_API_KEY, secure=True)

milvusClient = MilvusClient(
    uri=ZILLIZ_CLLUSTER_URI,
    token=ZILLIZ_API_KEY)

class ZillizRetriever(BaseRetriever):

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        results = milvusClient.search(
          collection_name=COLLECTION_NAME,
          data = [openAIClient.embeddings.create(input=query,
                                                model='text-embedding-ada-002').data[0].embedding],
          limit=3,
          output_fields=["author", "text"])

        docs = []
        for result in results[0]:
          doc = Document(page_content=result['entity']['text'])
          docs.append(doc)
        return docs


def get_response(question, language):
  retriever = ZillizRetriever()


  template = """Answer the question based only on the following context and formatted as an email:
  {context}

  Question: {question}

  Answer in the following language: {language}
  """

  prompt = ChatPromptTemplate.from_template(template)
  model = ChatOpenAI()

  chain = (
      {
         "context": itemgetter("question") | retriever,
         "question": itemgetter("question"),
          "language": itemgetter("language"),
     }
      | prompt
      | model
      | StrOutputParser()
  )
  return chain.invoke({'question': question, 'language': language})



#chain.invoke({'question': "What is the focus for this year?", 'language': 'english'})


DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 327d7acb4bd94e7d91f3192eb790741f


In [4]:
import gradio as gr

In [6]:
iface = gr.Interface(
    fn=get_response,
    inputs=["text", "text"],
    outputs="text",
    live=True,
    title="Content App",
    description="Ask a question for killer content",
)

iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://889b7521e463da738a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 83634f801a314952860fc4f8a9eafdd3


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6478c652ae17afffba.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
from operator import itemgetter
import os
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Milvus
from langchain.globals import set_debug, set_verbose
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from openai import OpenAI
from typing import List
from pymilvus import (
    connections,
    MilvusClient
)

set_verbose(True)
os.environ['OPENAI_API_KEY'] = "YOUR_OPENAI_API_KEY"  # Replace with your actual API key

connections.connect(uri=ZILLIZ_CLLUSTER_URI, token=ZILLIZ_API_KEY, secure=True)

milvusClient = MilvusClient(
    uri=ZILLIZ_CLLUSTER_URI,
    token=ZILLIZ_API_KEY)

class ZillizRetriever(BaseRetriever):
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        results = milvusClient.search(
            collection_name="YOUR_COLLECTION_NAME",  # Replace with your actual collection name
            data=[openAIClient.embeddings.create(input=query, model='text-embedding-ada-002').data[0].embedding],
            limit=3,
            output_fields=["author", "text"])

        docs = []
        for result in results[0]:
            doc = Document(page_content=result['entity']['text'])
            docs.append(doc)
        return docs

def get_response(question):
    retriever = ZillizRetriever()

    # Define response templates
    template1 = """Template 1: Answer the question based only on the following context and formatted as an email:
    {context}

    Question: {question}

    """

    template2 = """Template 2: Answer the question using a different format:
    {context}

    Question: {question}

    """

    prompt1 = ChatPromptTemplate.from_template(template1)
    prompt2 = ChatPromptTemplate.from_template(template2)
    model = ChatOpenAI()

    chain1 = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
        }
        | prompt1
        | model
        | StrOutputParser()
    )

    chain2 = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
        }
        | prompt2
        | model
        | StrOutputParser()
    )

    return f"{chain1.invoke({'question': question})}\n\n{chain2.invoke({'question': question})}"

# Create Gradio interface
iface = gr.Interface(
    fn=get_response,
    inputs=gr.Textbox("text", label="Enter your question"),
    outputs=["text", "text"],
    live=True,
    title="Engagys Content App",
    description="Ask a question and get two different answers based on different templates."
)

iface.launch()

DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 78ea8dc5c05b4ef99247bec0ca51aea3


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1072baa0c96bf7e6ad.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


