In [1]:
# ability to give a gift to a friend on instagram
# ability to create tweets-like feature on instagram
# create a marketplace for educators to sell their courses on instagram

In [2]:
import langchain
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.callbacks import get_openai_callback

import wandb
from wandb.integration.langchain import WandbTracer
from serpapi import GoogleSearch
import requests
from bs4 import BeautifulSoup
from IPython.display import display, Markdown, clear_output
from http.client import responses as http_responses
import os, sys, datetime


class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout


In [3]:
string_template = """\
You are a tech product manager. You have to help the user create a Product Requirement Document based on the questions the user asks you. The user will ask you specific questions about each topic they want to be included in the PRD. 

Do not repeat the same information again and again. Answers to each question should be unique and not repetitive. By this I mean do not repeat any ideas or sentences. Do not copy statements and ideas from previous sections. Any ideas or examples should only be in accordance to the particular section.

Format your responses in Markdown mode with each topic being the ##Heading, and your answer being the content. Highlight important points in **bold**. Give the PRD a suitable #Title.

For reference, let us say there are 3 people - A, B, and C belonging to different age groups, professions, and geographies. A is a 20-year-old college student from India. B is a 40-year-old working professional from the US. C is a 60-year-old retired person from the UK.
If required, for that particular section, you can use any of these people as examples to explain your point. The user does not know anything about these people.

You do not need to include these 3 people in every section. You can use them as examples only if required. You can also use other examples if you want to. You can also use yourself as an example if you want to.

Current conversation:
{history}
Human: {input}
AI: """

prompt_template = PromptTemplate(
    template=string_template,
    input_variables=["history", "input"],
)

prompts_list = [
    """Product Overview:
Define the Purpose and Scope of this product. It should include how different groups of users across ages, genders, and geographies can use this product. Include an overview of the product. Why should one use this product? Define the target audience and stakeholders in detail. Also, include the rationale behind having the particular group as the target audience. Explain the gap it is trying to fill as well - how it is different from and better than other similar products?""",
    """Product Objectives:
First, analyze whether the product objectives align with the company objectives if the company and company objectives are mentioned. Else, talk about the objectives of the product, what it will help achieve, and how it will assist customers. Think aloud. Explain your reasoning. Also, talk about why and how the business models of the product and company match. What company goals can the product help achieve - be it attracting customers, generating profits, or promoting the goodwill of the company? Also, explain how it would do this.""",
#     """Market Research:
# First, list out current and potential competitors. Current competitors should include already established businesses/products. Potential competitors should include products and businesses that aren’t yet popular or are still under development/ beta version. Also include major or minor differences between our product and the competitor products you have identified. Analyze how aspects of our product or competitor products are better for that particular aspect. How do the target customers different? Does our product better cater to current trends and expectations of the users? How? What should the product include to meet those trends and expectations.""",
#     """Competitive Analysis Table:
# Use all the above competitors to create a competitive analysis of these applications in a tabular form using the following points - user base, user region, different features supported, and pricing tiers. Don't limit yourself to these categories and think of other categories yourself. Return the output in a well-structured Markdown table""",
    """Feature Requirements:
What are some of the important features that should be implemented? Follow the MoSCoW format (Must have, Should have, Could have, Won’t have, along with why). How are we going to collect user inputs and use user data that we collect to make the product better and add other features?""",
    """Launch Strategy:
Compare US vs International markets for this product. Also, analyze this product and figure out what customer demographic is this product for. Based on these things, come up with a detailed launch strategy for the product. List the TAM vs SAM vs SOM. TAM or Total Available Market is the total market demand for a product or service. SAM or Serviceable Available Market is the segment of the TAM targeted by your products and services which is within your geographical reach. SOM or Serviceable Obtainable Market is the portion of SAM that you can capture.""",
    """User Stories:
Create user stories for the product. User stories are short, simple descriptions of a feature told from the perspective of the person who desires the new capability, usually a user or customer of the system. They typically follow a simple template: As a < type of user >, I want < some goal > so that < some reason >. For example, As a college student, I want to be able to share my notes with my friends so that I can help them with their studies.""",
    """Acceptance Criteria:
Define the quality of completeness required to be able to get to the MVP stage of this product.""",
    """Success Metrics:
How do we define success in this product? What are the KPIs to look out for? How are they measured? Why do those KPIs matter? How are we going to use these KPIs to make the product better?""",
    """Technical Feasibilities:
Outline the technical roadmap for this product. What mobile devices should this application be available for? What is a scalable and reliable tech stack which can be used for the frontend and the backend for this application?""",
    """Timeline:
Define the timeline for the product development. In addition to the timeline, what are the resources required to complete this project. Think about the resources required for each stage of the project, the number of employees required for each stage, and the time required for each stage."""
]

product_names = ["DateSmart", "TalentScout", "MusicMate"]
product_descriptions = [
    "A dating app that encourages users to have a conversation with each other before deciding whether they want to match. While some dating apps allow direct messages, it is only for plus users, and only to a limited number of people. Our app’s focus is to encourage conversation first. The app ensures strict verification to prevent fraud, scamsters and fake accounts.",
    "A Sports Analytics based product that allows coaches, analysts, team managers and owners scout for talent based on performances in the domestic and lower division circuits. The app will take lower division statistics, team information, team-mates’ information of players, along with ground information and weather information on match days, to predict how a player will fare in higher level of sports (national and international). State-of-the-art models such as Graph Neural Networks will be used to generate highly accurate predictions.",
    "An app that recommends music to you based on your preferences, time of the day, and what activity you did before, and what you plan to do after. The app will use predictive modelling, frequency modelling, and NLP techniques. The app will also have a social media aspect to it, where you can share your music with your friends, and see what they are listening to."
]

In [4]:
for product_name, product_description in zip(product_names[0:1], product_descriptions[0:1]):

    chat = ChatOpenAI(
    model="gpt-4",
    temperature=0,
    openai_api_key=os.environ["OPENAI_API_KEY"],
    )

    wandb.init(
        project="chat-prd-gpt-4",
        config={
            "model": "gpt-4",
            "temperature": 0
        },
        entity="arihantsheth",
        name=f"{product_name}_gpt-4",
    )

    memory = ConversationBufferMemory()

    chain = LLMChain(
        llm=chat,
        memory=memory,
        prompt=prompt_template,
        verbose=False
    )

    with get_openai_callback() as callback:

        initial_output = chain.predict(
            input=f"""\
I want to create the following new product:
{product_name}.

Product description: {product_description}

DO NOT START WRITING. WAIT FOR THE HUMAN TO WRITE "Start generating the PRD" BEFORE YOU START WRITING.
""", 
            callbacks=[WandbTracer()]
)

        output = ""
        for i, prompt in enumerate(prompts_list):

            # with get_openai_callback() as callback:

            output += chain.predict(
                input=prompt,
                callbacks=[WandbTracer()]
            )

            print(f"Prompt {i+1} of {len(prompts_list)}")

            output += "\n\n"

        # if not os.path.exists(f"../generated_prds/{product_name}"):
        #     os.makedirs(f"../generated_prds/{product_name}")

        # with open(f"../generated_prds/{product_name}/{product_name} 2023 Chat gpt-4.md", "w") as f:
        #     f.write(output)
    
    wandb.log({"prd": output})
    clear_output()

print(f"Product name: {product_name}")
print(f"Total tokens used: {callback.total_tokens}")
print(f"Prompt Tokens: {callback.prompt_tokens}")    
print(f"Completion Tokens: {callback.completion_tokens}")
print(f"Successful Requests: {callback.successful_requests}")
print(f"Total cost to generate: ${callback.total_cost:.2f}")

Product name: DateSmart
Total tokens used: 27370
Prompt Tokens: 23597
Completion Tokens: 3773
Successful Requests: 10
Total cost to generate: $0.93


In [5]:
def search_and_embed(search_query, vectordb):
    search = GoogleSearch({
    "q": search_query,
    "location": "Mumbai, Maharashtra, India",
    "api_key": os.environ["SERPAPI_API_KEY"],
    })

    results = search.get_dict()

    if "error" in results:
        return f"Error: {results['error']}"
            
    else:
        print(f"Number of organic results: {len(results['organic_results'])}")

    results_condensed = [(result['title'], result['link']) for result in results['organic_results'][:3]]
    content_p = ""
    count_p = 0
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

    for title, link in results_condensed:
        print(f"Title: {title}")
        # print(f"Link: {link}")

        try:
            response = requests.get(link)
        except requests.exceptions.ConnectionError:
            print("Connection timed out... Moving to next link")
            continue
        # print(f"Response code: {response.status_code}")
        # print(f"Reponse Message: {http_responses[response.status_code]}")
        if response.status_code != 200:
            print()
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        webpage = ""
        webpage += f'## {title}' + "\n"

        content_p += f'## {title}' + "\n"
        for p in soup.find_all('p'):
            paragraph = p.get_text(separator=' ')

            if len(paragraph) > 100:
                webpage += paragraph
                content_p += paragraph
                content_p += "\n\n"
                count_p += 1

        doc = text_splitter.create_documents(texts=[content_p], metadatas=[{"source": link, "title": title}])
        ids = vectordb.add_documents(documents=[*doc])
        print(f"Added {len(ids)} documents to the database")
        print()

        content_p += "\n-------------------------------------------------------------------------------------\n"

    clear_output(wait=True)
    return "Success"

def update_qa_chain(vectordb):
    retriever = vectordb.as_retriever()

    qa_chain_chat = ConversationalRetrievalChain.from_llm(llm=ChatOpenAI(model="gpt-4"),
                                                        chain_type="stuff",
                                                        retriever=retriever,
                                                        return_source_documents=True,
                                                        )
    
    return qa_chain_chat

def search_competitors_info(competitors, competitor_queries, vectordb):
    for competitor in competitors:
        print(competitor)
        for query in competitor_queries:
            query = query.format(competitor=competitor)
            search_and_embed(search_query=query, vectordb=vectordb)

    return "Success"

def query_competitors_db(competitors, qa_chain_chat):
    comp_analysis_results = {competitor: {} for competitor in competitors}

    for competitor in competitors:
        print(f"Competitor: {competitor}")

        competitor_queries = [
        "What is the user base of {competitor}?",
        "What is the revenue of {competitor}?",
        "What are new features of {competitor}?",
        ]

        for query in competitor_queries:
            db_res = qa_chain_chat(
                {
                    "question": query.format(competitor=competitor),
                    "chat_history": [],
                }
            )
            comp_analysis_results[competitor][query.format(competitor=competitor)] = db_res["answer"]

    return comp_analysis_results

In [10]:
today = datetime.datetime.now().strftime("%Y-%m-%d")

get_google_search_query_prompt = f"""Your training cutoff date is September 2021 while today is {today}.
Generate a Google search query to find the names of competitor apps
Only return the Google search query without the follwing things:
- Double quotes
- Current Year
- A period at the end of the sentence

"""

with get_openai_callback() as google_search_query_callback:
    search_query = chain.predict(
        input=get_google_search_query_prompt,
        callbacks=[WandbTracer()]
    )

print(f"Goolge Search query: {search_query}")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens p

Goolge Search query: best dating apps


In [7]:
# retrieve_apps_from_db_prompt = """\
# The competition apps are stored in a vector database.
# The vector database understands natural language, conversational based queries.
# Generate a query to retrieve the names of competitor apps.
# Only return the query without double quotes and period and include nothing else.
# Do not specify the current date in the query.
# """

# retrieve_apps_from_db_query = chain.predict(input=retrieve_apps_from_db_prompt)
# print(retrieve_apps_from_db_query)

In [11]:
# search_query = "What are the top dating apps in the market?"
vectordb = Chroma(embedding_function=OpenAIEmbeddings())

# 1. Search for competitors and embed them into VectorDB
search_and_embed(search_query=search_query, vectordb=vectordb)
print("Finished embedding list of competitors into VectorDB.")

# 2. Load the QA Chain
qa_chain_chat = update_qa_chain(vectordb=vectordb)

# 3. Retrieve the list of competitors from VectorDB
competitors = qa_chain_chat(
        {
            "question": f"{search_query}. Only return the names of the competitors in a comma separated list.",
            "chat_history": []
        }
    )['answer'].replace(" ", "").split(",")

print("Finished retrieving list of competitors from VectorDB:")
print(competitors)

# 4. List the queries to search for competitor information
competitor_queries = [
        "What is the user base of the {competitor}?",
        "What is the revenue of the {competitor}?",
        "What are new features of the {competitor}?",
    ]

# 5. Search for competitor information on the web and embed them into VectorDB
search_competitors_info(competitors=competitors, competitor_queries=competitor_queries, vectordb=vectordb)
print("Finished embedding competitor information into VectorDB.")

# 6. Update the QA Chain
qa_chain_chat = update_qa_chain(vectordb=vectordb)

# 7. Retrieve the competitor information from VectorDB
competitive_analysis_results = query_competitors_db(competitors=competitors, qa_chain_chat=qa_chain_chat)
print("Finished retrieving competitor information from VectorDB and parsed the results into a dictionary.")

eHarmony
Zoosk
Finished embedding competitor information into VectorDB.
Competitor: Tinder
Competitor: Bumble
Competitor: OkCupid
Competitor: Hinge
Competitor: CoffeeMeetsBagel
Competitor: Happn
Competitor: PlentyofFish
Competitor: Match.com
Competitor: eHarmony
Competitor: Zoosk


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens p

Finished retrieving competitor information from VectorDB and parsed the results into a dictionary.


In [12]:
market_analysis_prompt = f"""\
The following nested JSON formatted object contains details of competitor apps. Use this information to support your analysis of the market and the product if required:
{competitive_analysis_results}

Now, let us get continue generating the PRD using the same Markdown format as before.

Market Analysis:
Include major or minor differences between our product and the competitor products. \
Analyze how aspects of our product or competitor products are better for that particular aspect. \
How do the target customers different? \
Does our product better cater to current trends and expectations of the users? How? \
What should the product include to meet those trends and expectations.
"""

competitive_table_prompt = """\
Competitive Analysis:
Use all the above competitors to create a competitive analysis of these applications in a tabular form using the following points - user base, user region, different features supported, and pricing tiers. \
Don't limit yourself to these categories and think of other categories yourself. 
Return the output in a well-structured Markdown table. Use the competitor app details from the JSON object if required.
"""

final_prompt = """\
Conclusion:
Include any final thoughts or comments about the product or the market. \
Include any other information that you think is important to get across to the reader. \
Include any information that is not present in the PRD but is important to the product.
"""

with get_openai_callback() as callback_market_analysis:
    market_analysis_output = chain.predict(
        input=market_analysis_prompt,
        callbacks=[WandbTracer()]
    )

    output += market_analysis_output + "\n\n"

    competitive_table_output = chain.predict(
        input=competitive_table_prompt,
        callbacks=[WandbTracer()]
    )

    output += competitive_table_output + "\n\n"

    final_output = chain.predict(
        input=final_prompt,
        callbacks=[WandbTracer()]
    )

    output += final_output + "\n\n"

wandb.finish()

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens p

0,1
prd,# DateSmart Product ...


In [13]:
total_tokens = callback.total_tokens + google_search_query_callback.total_tokens + callback_market_analysis.total_tokens
prompt_tokens = callback.prompt_tokens + google_search_query_callback.prompt_tokens + callback_market_analysis.prompt_tokens
completion_tokens = callback.completion_tokens + google_search_query_callback.completion_tokens + callback_market_analysis.completion_tokens
successful_requests = callback.successful_requests + google_search_query_callback.successful_requests + callback_market_analysis.successful_requests
total_cost = callback.total_cost + google_search_query_callback.total_cost + callback_market_analysis.total_cost

print(f"Product name: {product_name}")
print(f"Total tokens used: {total_tokens}")
print(f"Prompt Tokens: {prompt_tokens}")    
print(f"Completion Tokens: {completion_tokens}")
print(f"Successful Requests: {successful_requests}")
print(f"Total cost to generate: ${total_cost:.2f}")

Product name: DateSmart
Total tokens used: 53218
Prompt Tokens: 48402
Completion Tokens: 4816
Successful Requests: 14
Total cost to generate: $1.74


In [None]:
display(Markdown(output))

In [None]:
#     """Market Research:
# First, list out current and potential competitors. Current competitors should include already established businesses/products. Potential competitors should include products and businesses that aren’t yet popular or are still under development/ beta version. Also include major or minor differences between our product and the competitor products you have identified. Analyze how aspects of our product or competitor products are better for that particular aspect. How do the target customers different? Does our product better cater to current trends and expectations of the users? How? What should the product include to meet those trends and expectations.""",
#     """Competitive Analysis Table:
# Use all the above competitors to create a competitive analysis of these applications in a tabular form using the following points - user base, user region, different features supported, and pricing tiers. Don't limit yourself to these categories and think of other categories yourself. Return the output in a well-structured Markdown table""",

In [15]:
if not os.path.exists(f"../generated_prds/{product_name}"):
    os.makedirs(f"../generated_prds/{product_name}")

with open(f"../generated_prds/{product_name}/{product_name} with internet Chat gpt-4.md", "w") as f:
    f.write(output)