In [2]:
from typing import Sequence
import bs4
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI

In [3]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [4]:
def web2docs(web_pages: Sequence[str], div_class: Sequence[str]) -> list:
    '''Get html documents from web
    Parameters
    web_page: list or tuple of strings with web address of pages
    div_class: div classes on page to keep
    Return
    docs object with content and metadata
    '''
    bs4_strainer = bs4.SoupStrainer(class_= div_class)
    loader = WebBaseLoader(
    web_paths=web_pages,
    bs_kwargs={"parse_only": bs4_strainer},
)
    return loader.load()

In [5]:
# Only keep datasheet-content section
docs = web2docs(("https://www.kemet.com/en/us/capacitors/ceramic/ceramics-faq.html","https://www.kemet.com/en/us/capacitors/polymer/tantalum-polymer-faqs.html"),
                ("datasheet-content"))
                

In [6]:
print("Doc#      Length      Source")
for i, doc in enumerate(docs):
    print(f"{i}         {len(doc.page_content)}       {doc.metadata}")

Doc#      Length      Source
0         59050       {'source': 'https://www.kemet.com/en/us/capacitors/ceramic/ceramics-faq.html'}
1         21525       {'source': 'https://www.kemet.com/en/us/capacitors/polymer/tantalum-polymer-faqs.html'}


In [7]:
print(docs[0].page_content[:500])




 
    Disclaimer




This document is meant to provide general answers to frequently asked questions. All information in this document is for informational purposes only and is not considered a specification. 
To submit a question, please go to https://www.kemet.com/en/us/contact-us.html





 
    General




 
    Do MLCCs experience outgassing?



Key Words: Outgassing
KEMET Surface Mount Device (SMD) Multilayer Ceramic Capacitors (MLCCs) are constructed using high-temperature sintering pr


Our loaded document is over 59k characters long. This is too long to fit in the context window of many models. Even for those models that could fit the full post in their context window, models can struggle to find information in very long inputs.

To handle this we’ll split the Document into chunks for embedding and vector storage. This should help us retrieve only the most relevant bits of the blog post at run time.

In this case we’ll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it. We use the RecursiveCharacterTextSplitter, which will recursively split the document using common separators like new lines until each chunk is the appropriate size. This is the recommended text splitter for generic text use cases.

We set add_start_index=True so that the character index at which each split Document starts within the initial Document is preserved as metadata attribute “start_index”.

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

121

In [8]:
all_splits[110]

Document(page_content='What is surge voltage?\n\n\n\nSurge voltage is the maximum voltage (peak value) which may be applied to the capacitor for a short period. The surge voltage must not be applied for periodic charging and discharging in the course of normal operation, and cannot be part of the operation voltage.\nThe permissible surge voltage for all capacitors is 1.3 * VR. If voltage impulses (transient voltages) exceeding the surge voltage occur, this may lead to irreparable damage. If applications of this kind are planned, please consult us first.\nFor polymer automotive series, the following table can be considered as reference for max surge scenario (T598/T599):\n\n\n\n\n\n\n\n\n \n    What is Impedance (Z)/Equivalent Series Resistance (ESR), and Equivalent Series Inductance (ESL)?', metadata={'source': 'https://www.kemet.com/en/us/capacitors/polymer/tantalum-polymer-faqs.html', 'start_index': 14494})

In [9]:
len(all_splits[110].page_content)

761

In [10]:
all_splits[110].metadata

{'source': 'https://www.kemet.com/en/us/capacitors/polymer/tantalum-polymer-faqs.html',
 'start_index': 14494}

Now we need to index our 88 text chunks so that we can search over them at runtime. The most common way to do this is to embed the contents of each document split and insert these embeddings into a vector database (or vector store). When we want to search over our splits, we take a text search query, embed it, and perform some sort of “similarity” search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are high dimensional vectors).

We can embed and store all of our document splits in a single command using the Chroma vector store and OpenAIEmbeddings model.

In [11]:
persist_directory = 'vectordb'
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(), persist_directory=persist_directory)

In [13]:
vectorstore.persist()
vectorstore = None

  warn_deprecated(


In [15]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=OpenAIEmbeddings())

Now let’s write the actual application logic. We want to create a simple application that takes a user question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and returns an answer.

First we need to define our logic for searching over documents. LangChain defines a Retriever interface which wraps an index that can return relevant Documents given a string query.

The most common type of Retriever is the VectorStoreRetriever, which uses the similarity search capabilities of a vector store to facilitate retrieval. Any VectorStore can easily be turned into a Retriever with VectorStore.as_retriever():

In [24]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What is ESR for MLCC?")

len(retrieved_docs)

6

In [25]:
print(retrieved_docs[0].page_content)

Typical Dielectric Absorption for MLCCs









 
    What is Equivalent Series Resistance (ESR)?



Equivalent Series Resistance, or ESR, is the sum of all the resistive losses in an MLCC and represents the fundamental part of the complex impedance equation as seen below. This equation includes the losses due to the dielectric, internal electrodes, and external terminations. For MLCCs, most resistive losses are due to the dielectric and internal electrodes. At low frequencies, the dielectric losses dominate and decrease as frequency increases. At higher frequencies, the electrode losses dominate and increase as frequency increases. MLCCs have very low ESR compared to other capacitor technologies, making them a popular choice for AC applications.













Figure 1 – Power loss contribution for dielectric and electrodes vs frequency


In [26]:
print(retrieved_docs[1].page_content)

Figure 1 – Power loss contribution for dielectric and electrodes vs frequency









ESR is an important characteristic in MLCCs since high ESR can cause excessive power loss, resulting in thermal runaway. Since the power dissipated in an MLCC is a product of the ESR and the square of the current, higher current applications can easily cause self-heating of the MLCC.




























Class 1 MLCCs such as C0G and U2J utilize a low-loss dielectric and therefore have much lower ESR than Class 2 MLCCs such as X7R, X5R, etc. Due to low ESR, Class 1 MLCCs are ideal for high AC applications such as DC-LINK, LLC resonant converters, and wireless power transfer.











    Figure 2 – Impedance and ESR vs. frequency for Class 1 and Class 2 MLCCs









The KEMET KSIM tool provides many electrical characteristics for MLCCs, including ESR.  
https://ksim3.kemet.com/


















 
    What is Dissipation Factor (DF)?


In [27]:
prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [28]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is ESR for MLCC?"):
    print(chunk, end="", flush=True)

ESD stands for Electrostatic Discharge and refers to the release of static electricity caused by contact between two objects. It can reach voltages as high as 30kV and currents of up to 30 amps, but the total energy of ESD transient pulses is very low (<50mJ) and occurs in less than 100ns. ESD can be harmless to people but can have severe effects on electronics if not properly managed.