In [None]:
import os
from langchain import OpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.retrievers.web_research import WebResearchRetriever
from langchain.chains import RetrievalQAWithSourcesChain

In [None]:
from dotenv import load_dotenv;

# Load Environment variables
load_dotenv()

In [None]:
# Vectorstore
vectorstore = Chroma(embedding_function=OpenAIEmbeddings(), persist_directory="./chroma_db_oai")

In [None]:
# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True)


In [None]:
# Memory for Retriever
memory = ConversationSummaryBufferMemory(llm=llm, input_key='question', output_key='answer', return_messages=True)

In [None]:
# Search
search = GoogleSearchAPIWrapper()

In [None]:
# Retriever
web_research_retriever = WebResearchRetriever.from_llm(
    vectorstore=vectorstore,
    llm=llm,
    search=search,
)

In [None]:
# Define the User Input
# brand = "CocaCola"
brand = input()

# Initialize question-answering chain with sources retrieval
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm, retriever=web_research_retriever)

In [None]:
result_introduction = qa_chain({"question": f"Write an introduction to {brand}"})
print(result_introduction["answer"])
print(result_introduction["sources"])

In [None]:
# Query the QA chain with the user input question
result_history = qa_chain({"question": f"Explain the History of {brand} in detail"})
print(result_history["answer"])
print(result_history["sources"])

In [None]:
result_demographics = qa_chain({"question": f"Explain the Demographics of {brand} in detail"})
print(result_demographics["answer"])
print(result_demographics["sources"])

In [None]:
result_social_presence = qa_chain({"question": f"Explain the Social presence of {brand} in detail"})
print(result_social_presence["answer"])
print(result_social_presence["sources"])

In [None]:
result_market_activity  = qa_chain({"question": f"Explain the Market Activity  of {brand}"})
print(result_market_activity["answer"])
print(result_market_activity["sources"])

In [None]:
# Create a Markdown file Article

from mdutils.mdutils import MdUtils
from mdutils import Html
md = MdUtils(file_name=f"output/{brand}")
md.new_header(level=1, title=f"{brand}")
md.new_paragraph(f'{result_introduction["answer"]}\n')
md.new_header(level=2, title="History")
md.new_paragraph(f'{result_history["answer"]}\n')
md.new_header(level=2, title="Demographics")
md.new_paragraph(f'{result_demographics["answer"]}\n')
md.new_header(level=2, title="Social presence")
md.new_paragraph(f'{result_social_presence["answer"]}\n')
md.new_header(level=2, title="Market activity")
md.new_paragraph(f'{result_market_activity["answer"]}\n')
md.new_header(level=2, title="Reference Articles")
references = result_introduction["sources"].split(',')
references.extend(result_history["sources"].split(','))
references.extend(result_demographics["sources"].split(','))
references.extend(result_social_presence["sources"].split(','))
references.extend(result_market_activity["sources"].split(','))
md.new_list(references)
md.create_md_file()

In [None]:
from md2pdf.core import md2pdf

markdown_file = f"output/{brand}.md"
output_pdf = f"output/{brand}.pdf"

md2pdf(pdf_file_path=output_pdf, css_file_path="md.css", md_file_path=markdown_file)
