- Importing necessary libraries and loading the model.

In [1]:
import os
from dotenv import load_dotenv
from langchain_chroma import Chroma
import google.generativeai as genai
# from langchain.llms import GooglePalm
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

import warnings
warnings.filterwarnings("ignore")

# Load environment variables and configure Google Generative AI
load_dotenv()
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# Note: max_output_tokens or prompting to balance the length of the generated output.
llm = GoogleGenerativeAI(model="models/text-bison-001", temperature=0.1)
# llm = GooglePalm(model_name="models/text-bison-001", temperature=0.1)

- Loading the embeddings.

In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# Load the embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Now try to load the vector store
loaded_vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

print(f"Number of documents in the loaded vector store: {loaded_vectorstore._collection.count()}")

Number of documents in the loaded vector store: 47


In [3]:
prompt_template = """You are an AI assistant tasked with writing a comprehensive document in Markdown format based on a provided table of contents. 
Use the following pieces of context to write detailed sections for the document.
If you don't have enough information, state that more research is needed on that topic.

Context: {context}

Section to write:
{question}

AI Assistant: Write a detailed section for the given header in Markdown format. 
Provide comprehensive and informative content directly related to the section title.
Do not create additional headers or a table of contents.
Ensure the content is well-structured, relevant to the topic, and flows logically.
Use the appropriate number of '#' symbols for the header level as indicated in the section title.
Do not mention or include any links to images or any other form of data other than text.
Focus solely on providing textual content relevant to the section."""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# Update the qa chain with the new prompt
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=loaded_vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

def notegen(query):
    result = qa.invoke({"query": query})
    return result

In [5]:
# def generate_document(toc):
    
#     # Tokenize the input
#     sections = [section.strip() for section in toc.split(',')]
    
#     # Open the output file
#     with open('output.md', 'w', encoding='utf-8') as f:
#         # Iterate through each section
#         for section in sections:
            
#             # Generate content for the section
#             result = notegen(section)

#             print(section)
            
#             # Write the section header
#             f.write(f"{section}\n\n")
            
#             # Write the generated content
#             f.write(f"{result['result']}\n\n")
    
#     print("Saved!")

def generate_document(toc):

    # Open the output file
    with open('output.md', 'a', encoding='utf-8') as f:
            
        # Generate content for the section
        result = notegen(toc)

        print(toc)
        
        # Write the section header
        # f.write(f"{toc}\n\n")
        
        # Write the generated content
        f.write(f"{result['result']}\n\n")
    
    print("Saved!")

# toc = input("Please enter the table of contents in one line, separated by commas: ")

# toc = """# Understanding Large Language Model Architectures, ##1. Fundamentals of LLMs, ###1.1 Definition and Key Concepts, ###1.2 Historical Development of LLMs, ##2. Core Architectural Components, ###2.1 Transformer Architecture, ###2.2 Attention Mechanism, ###2.3 Self-Attention Mechanism, ##3. Training and Fine-Tuning LLMs, ###3.1 Data Preprocessing, ###3.2 Training Process, ###3.3 Fine-Tuning Process"""

toc1 = """# Understanding Large Language Model Architectures"""
toc2 = """##1. Fundamentals of LLMs"""
toc3 = """##2. Core Architectural Components"""
toc4 = """###2.1 Transformer Architecture"""

# Call the function
# generate_document(toc1)
# generate_document(toc2)
generate_document(toc3)
generate_document(toc4)

##2. Core Architectural Components
Saved!
###2.1 Transformer Architecture
Saved!


: 

##2. Core Architectural Components, ###2.1 Transformer Architecture