#  Deal.II Assistant

In [1]:
# !pip install langchain langchain-community langchain-cohere langchain-chroma tiktoken gradio beautifulsoup4

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.load import dumps, loads
import json
import tiktoken
#from langchain_community.document_loaders import RecursiveUrlLoader
#from bs4 import BeautifulSoup as Soup

In [3]:
# Cohere api
import os
cohere_api = 'Your Cohere API Key'
os.environ['COHERE_API_KEY'] = cohere_api

In [4]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens


urls = []
for i in range(1,91):
    if i not in [73,80,84,88]:
        base_url = "https://dealii.org/current/doxygen/deal.II/step_"
        url = base_url + str(i) + ".html"
        urls.append(url)
#print(urls)

"""
docs = []
for url in urls:
    loader = RecursiveUrlLoader(url=url, max_depth=11, extractor=lambda x: Soup(x, "html.parser").text)

    doc = loader.load()

    docs.extend(doc)

    string_representation = dumps(doc)

    with open("./save_urls/step_" + url[48:-5] + ".json", "w") as fp:
        json.dump(string_representation, fp)

print(f"No. of webpages: {len(docs)}")
"""

docs = []
for i in range(1,91):
    if i not in [73,80,84,88]:
        with open("./save_urls/step_" + str(i) + ".json", "r") as fp:
            doc = loads(json.load(fp))
            docs.extend(doc)

print(f"No. of webpages: {len(docs)}")



# Split the document into chunks
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=400, chunk_overlap=40)
splitted_docs = splitter.split_documents(docs)
print(f"No. of splitted documents: {len(splitted_docs)}")


# Calculate the number of tokens for each document
docs_texts = [d.page_content for d in docs]
counts = [num_tokens_from_string(d, "cl100k_base") for d in docs_texts]

print(f"No. of tokens: {sum(counts)}")

  doc = loads(json.load(fp))


No. of webpages: 86
No. of splitted documents: 11051
No. of tokens: 2334034


In [5]:
from langchain_cohere import CohereEmbeddings

embedding_function = CohereEmbeddings(model="embed-english-v3.0")

db = Chroma(persist_directory='./dealii_db_400_40/', embedding_function=embedding_function)

#db.delete_collection()

#db = Chroma.from_documents(splitted_docs, embedding_function, persist_directory='./dealii_db_400_40')

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\mehdi\AppData\Local\sagemaker\sagemaker\config.yaml


In [6]:
from langchain_cohere import ChatCohere

llm = ChatCohere(model='command-r', temperature=0.)

In [7]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.load import dumps, loads
from operator import itemgetter
import gradio as gr

In [8]:
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

def create_history(history):
    s = ""
    for i in range(0, len(history), 2):
        s += f'Question {i//2+1}: ' + history[i]['content'] + "\n"
        s += f'Answer {i//2+1}: ' + history[i+1]['content'] + "\n"
    return s

def remove_empty_string(list_of_questions):
    indices = []
    for i, sentence in enumerate(list_of_questions):
        if sentence == "":
            indices.append(i)
    for i in sorted(indices, reverse=True):
        del list_of_questions[i]
    return list_of_questions

template1 = """You are an expert assistant for question-answering tasks for deal.II library, \
an open-source C++ finite element library. The library website can be accessed at https://dealii.org. \
Use the following pieces of retrieved context and history of the conversation to answer the question. \
Provide the code examples where possible. If you don't know the answer, just say that you don't know. \
If you know the answer, cite the source of your answer at the end. REMEMBER to add the sources of your answers at the end.

Question: {question}

Context: {context}

History of questions and answers between user and assistant: {history}

Answer:
"""
prompt1 = ChatPromptTemplate.from_template(template1)

template2 = """You are an expert assistant for question-answering tasks for a finite element library. \
Using this library, one can numerically solve ordinary differential equations \
and partial differential equations on mathematical domains for variety of problems. \
Your task is to generate five different versions of the given question. \
JUST output each question in one line. Add the original question also. \
Nothing else should be mentioned in the output, just questions separated by newlines.

Question: {question}
"""

prompt2 = ChatPromptTemplate.from_template(template2)

template3 = """Given a chat history and the latest user question \
which might reference context in the chat history, \
reformulate a standalone question which can be understood \
without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is. \
Do not return anything else.

Question: {question}

History of questions and answers between user and assistant: {history}
"""
prompt3 = ChatPromptTemplate.from_template(template3)

base_retriever = db.as_retriever()

reformulate_chain = prompt3 | llm | StrOutputParser()

multi_question_chain = prompt2 | llm | StrOutputParser() | (lambda x: x.split("\n")) | remove_empty_string

retrieval_chain = reformulate_chain | multi_question_chain | base_retriever.map() | get_unique_union

rag_chain = (
    {"context": retrieval_chain, "question": reformulate_chain, "history": itemgetter('history')}
    | prompt1
    | llm
    | StrOutputParser()
)


In [9]:
def response_function(message, history):
    history_str = create_history(history)

    partial_message = ""
    for s in rag_chain.stream({'question': message, 'history': history_str}):
        partial_message += s
        yield partial_message

examples = ["How can you help me?", "What is FE_Nothing?", "How to write a loop over all cells?", "How to construct Lagrange elements?", "What is a preconditioner?"]

description = "This assistant helps with questions about the functionality of the deal.II library, including its applications, use cases, and specific functions or classes. It draws information from all 90 tutorials in the deal.II documentation but does not cover the entire documentation."

gr.ChatInterface(response_function, type="messages", title="deal.II Assistant", description=description, examples=examples).launch() #share=True


* Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.




In [10]:
question = "what is FE_Nothing?"

history = []
history_str = create_history(history)

result1 = rag_chain.invoke({'question': question, 'history': history_str})

history.append({"role": "user", "content": question})
history.append({"role": "assistant", "content": result1})

question2 = "where can i use it?"

history_str = create_history(history)

result2 = rag_chain.invoke({'question': question2, 'history': history_str})

history.append({"role": "user", "content": question2})
history.append({"role": "assistant", "content": result2})

## Answers:

In [11]:
print(result1)

The term "FE_Nothing" refers to a special finite element class in deal.II with exactly zero degrees of freedom per cell. A cell in this context is a small region into which the solution domain is divided -- the finite element is the basic unit for the approximation of solutions to problems. 

The FE_Nothing class is used when no computations with shape functions are required, only the JxW values from an FEValues object. Using FE_Nothing allows one to take advantage of a uniform description of the solution domain, which enables certain simplifications in the code. For instance, counting degrees of freedom, sorting them, or partitioning matrices become simpler when using FE_Nothing. This class is also useful for creating easy graphical outputs, as all fields are defined on all nodes of the mesh, which is not the case for other finite element classes in deal.II. 

Here is an example of how FE_Nothing is implemented in the code:
```cpp
const MappingQ<dim> mapping(degree);
const FE_Nothing<

In [12]:
print(result2)

The FE_Nothing class is used when the function space of the solution domain requires no degrees of freedom, meaning it is a constant zero function. This class is useful for simplifying certain operations, such as counting degrees of freedom, sorting, or partitioning matrices. It also makes creating graphical outputs easier since the fields are defined on all the nodes of the mesh.

Here's an example of how to implement it:
```cpp
const MappingQ<dim> mapping(degree);
const FE_Nothing<dim> fe;
```

The FE_Nothing class is declared in the following include file:
```cpp
#include <deal.II/fe/fe_nothing.h>
```

Sources: https://dealii.org/current/doxygen/deal.II/step_46.html , https://dealii.org/current/doxygen/deal.II/step_10.html
