# Task 1: Data Preparation

In [1]:
# Onboarding Questions
onboarding_questions = [
    {"id": 1, "question": "What is your full name?", "field": "name"},
    {"id": 2, "question": "What is your contact number?", "field": "contact_number"},
    {"id": 3, "question": "What is your email address?", "field": "email"},
    {"id": 4, "question": "What is your business name?", "field": "business_name"},
    {"id": 5, "question": "What type of business do you run?", "field": "business_type"},
    {"id": 6, "question": "How many employees does your company have?", "field": "company_size"},
    {"id": 7, "question": "Where is your company located?", "field": "company_location"},
    {"id": 8, "question": "What is your company's website?", "field": "company_website"}
]

# Database Schema (using SQLAlchemy as an example)
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

class User(Base):
    __tablename__ = 'users'

    id = Column(Integer, primary_key=True)
    name = Column(String)
    contact_number = Column(String)
    email = Column(String, unique=True)
    business_name = Column(String)
    business_type = Column(String)
    company_size = Column(String)
    company_location = Column(String)
    company_website = Column(String)

# Set up the database
engine = create_engine('sqlite:///onboarding.db')
Base.metadata.create_all(engine)

  Base = declarative_base()


# Task 2: Build a Conversational Agent with LangChain and Llama Index

In [None]:
import os
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from onboarding_questions_schema import User, onboarding_questions

# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "your-api-key-here"

# Initialize LangChain
llm = OpenAI(temperature=0.7)
template = """
You are an AI assistant helping with user onboarding. Ask the user the following question:
{question}
Current conversation:
{chat_history}
Human: {human_input}
AI: """

prompt = PromptTemplate(
    input_variables=["question", "chat_history", "human_input"],
    template=template
)

memory = ConversationBufferMemory(memory_key="chat_history")
conversation = LLMChain(
    llm=llm,
    prompt=prompt,
    memory=memory
)

# Initialize Llama Index
def create_index(directory_path):
    max_input_size = 4096
    num_outputs = 512
    max_chunk_overlap = 20
    chunk_size_limit = 600

    prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.7, model_name="text-davinci-002", max_tokens=num_outputs))
    documents = SimpleDirectoryReader(directory_path).load_data()
    index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    index.save_to_disk('index.json')
    return index

# Load or create the index
index_file = 'index.json'
if os.path.exists(index_file):
    index = GPTSimpleVectorIndex.load_from_disk(index_file)
else:
    index = create_index('path/to/your/documents')

# Set up database connection
engine = create_engine('sqlite:///onboarding.db')
Session = sessionmaker(bind=engine)
session = Session()

def ask_question(question):
    response = conversation.predict(question=question, human_input="")
    return response.strip()

def store_response(field, value):
    user = session.query(User).first()
    if not user:
        user = User()
        session.add(user)
    setattr(user, field, value)
    session.commit()

def onboarding_process():
    print("Welcome to the onboarding process!")
    for question in onboarding_questions:
        response = ask_question(question['question'])
        print(f"AI: {response}")
        user_input = input("Human: ").strip()
        
        # Use Llama Index to get additional information if needed
        if user_input.lower() == "help":
            query = f"Provide more information about {question['field']}"
            result = index.query(query)
            print(f"AI: Here's some additional information: {result}")
            user_input = input("Human: ").strip()
        
        store_response(question['field'], user_input)
        memory.chat_memory.add_user_message(user_input)
    
    print("Thank you for completing the onboarding process!")

if __name__ == "__main__":
    onboarding_process()

# Task 3: Implement Retrieval-Augmented Generation RAG

In [None]:
import os
import faiss
import numpy as np
from typing import List
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from onboarding_questions_schema import User, onboarding_questions

# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "your-api-key-here"

# Initialize LangChain components
llm = OpenAI(temperature=0.7)
embeddings = OpenAIEmbeddings()

# Load and prepare the knowledge base
def load_knowledge_base(directory: str) -> List[str]:
    documents = []
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            loader = TextLoader(os.path.join(directory, filename))
            documents.extend(loader.load())
    
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    return texts

# Create FAISS index
def create_faiss_index(texts: List[str]) -> FAISS:
    return FAISS.from_documents(texts, embeddings)

# Set up RAG
def setup_rag(index: FAISS) -> RetrievalQA:
    retriever = index.as_retriever(search_kwargs={"k": 2})
    return RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

# Initialize database
engine = create_engine('sqlite:///onboarding.db')
Session = sessionmaker(bind=engine)
session = Session()

# Load knowledge base and create index
knowledge_base_dir = "path/to/your/knowledge_base"
texts = load_knowledge_base(knowledge_base_dir)
faiss_index = create_faiss_index(texts)

# Set up RAG
rag = setup_rag(faiss_index)

# Onboarding conversation
def onboarding_conversation():
    template = """
    You are an AI assistant helping with user onboarding. Ask the user the following question:
    {question}
    Current conversation:
    {chat_history}
    Human: {human_input}
    AI: """

    prompt = PromptTemplate(
        input_variables=["question", "chat_history", "human_input"],
        template=template
    )

    conversation = LLMChain(
        llm=llm,
        prompt=prompt,
        verbose=True
    )

    chat_history = []

    print("Welcome to the onboarding process!")
    for question in onboarding_questions:
        while True:
            response = conversation.predict(question=question['question'], chat_history="\n".join(chat_history), human_input="")
            print(f"AI: {response}")
            user_input = input("Human: ").strip()
            
            if user_input.lower() == "help":
                help_response = rag.run(f"Provide more information about {question['field']}")
                print(f"AI: {help_response}")
            elif user_input.lower() == "done":
                store_response(question['field'], user_input)
                chat_history.append(f"Human: {user_input}")
                chat_history.append(f"AI: Thank you. Let's move on to the next question.")
                break
            else:
                store_response(question['field'], user_input)
                chat_history.append(f"Human: {user_input}")
                chat_history.append(f"AI: Thank you for providing that information.")
                break

    print("Onboarding process completed. Do you have any other questions?")
    while True:
        user_input = input("Human: ").strip()
        if user_input.lower() == "exit":
            break
        response = rag.run(user_input)
        print(f"AI: {response}")

def store_response(field, value):
    user = session.query(User).first()
    if not user:
        user = User()
        session.add(user)
    setattr(user, field, value)
    session.commit()

if __name__ == "__main__":
    onboarding_conversation()