In [1]:
import langchain
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.schema import HumanMessage, AIMessage
from langchain.memory import ChatMessageHistory
from langchain.callbacks import get_openai_callback

import wandb
from wandb.integration.langchain import WandbTracer
from serpapi import GoogleSearch
import requests
from bs4 import BeautifulSoup
from IPython.display import display, Markdown, clear_output
from http.client import responses as http_responses
import os
import sys
import time

class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout


# Usage:
with HiddenPrints():
    print("This will not be printed")

# contains the 7 prompts for the PRD so we dont have to run it each time
chat_history = ChatMessageHistory(
    messages=[
        HumanMessage(content='I want to create the following new product:\nDateSmart.\n\nProduct description: A dating app that encourages users to have a conversation with each other before deciding whether they want to match. While some dating apps allow direct messages, it is only for plus users, and only to a limited number of people. Our app’s focus is to encourage conversation first. The app ensures strict verification to prevent fraud, scamsters and fake accounts.\n\nDO NOT START WRITING. WAIT FOR THE HUMAN TO WRITE "Start generating the PRD" BEFORE YOU START WRITING.\n', additional_kwargs={}, example=False), 
        AIMessage(content="Sure, I understand. I'm ready to help you create a Product Requirement Document for your new product, DateSmart. Please let me know when you want to start.", additional_kwargs={}, example=False), 
        HumanMessage(content='Product Overview:\nDefine the Purpose and Scope of this product. It should include how different groups of users across ages, genders, and geographies can use this product. Include an overview of the product. Why should one use this product? Define the target audience and stakeholders in detail. Also, include the rationale behind having the particular group as the target audience. Explain the gap it is trying to fill as well - how it is different from and better than other similar products?', additional_kwargs={}, example=False), 
        AIMessage(content='# DateSmart Product Requirement Document\n\n## Product Overview\n\n**DateSmart** is a revolutionary dating app designed to foster meaningful connections by encouraging users to engage in conversations before deciding to match. The app is designed to cater to a wide range of users across different ages, genders, and geographies. \n\nThe purpose of DateSmart is to provide a platform where users can have a more authentic and meaningful dating experience. Unlike other dating apps that focus on superficial aspects, DateSmart emphasizes the importance of conversation and getting to know the person behind the profile. \n\nThe scope of this product extends to anyone looking for a more genuine connection in the dating scene. This includes but is not limited to, college students like A from India who are looking for a meaningful relationship, working professionals like B from the US who are too busy for traditional dating methods, and even retired individuals like C from the UK who are looking to find companionship.\n\nThe rationale behind targeting such a diverse audience is that everyone, regardless of their age, gender, or location, deserves a chance at finding a meaningful connection. \n\nThe product fills a significant gap in the market by focusing on conversation-first approach. While other dating apps offer direct messaging as a premium feature, DateSmart makes it a core part of the user experience. This not only sets DateSmart apart from its competitors but also makes it a more appealing choice for users who value substance over superficiality.\n\nMoreover, DateSmart ensures strict verification to prevent fraud, scamsters, and fake accounts, providing a safe and secure environment for its users. This is a significant improvement over other dating apps where fake profiles and scams are prevalent.\n\nIn terms of stakeholders, they include the app users, the development team, the marketing team, potential investors, and partners. Each stakeholder plays a crucial role in the success of the product, from development to user acquisition and retention.', additional_kwargs={}, example=False), 
        HumanMessage(content='Product Objectives:\nFirst, analyze whether the product objectives align with the company objectives if the company and company objectives are mentioned. Else, talk about the objectives of the product, what it will help achieve, and how it will assist customers. Think aloud. Explain your reasoning. Also, talk about why and how the business models of the product and company match. What company goals can the product help achieve - be it attracting customers, generating profits, or promoting the goodwill of the company? Also, explain how it would do this.', additional_kwargs={}, example=False), 
        AIMessage(content="## Product Objectives\n\nThe primary objective of **DateSmart** is to revolutionize the online dating experience by fostering genuine connections through meaningful conversations. This aligns with the broader company objective of creating innovative solutions that enhance human interactions and relationships in the digital age.\n\nBy focusing on conversation-first approach, DateSmart aims to provide a more authentic and meaningful dating experience. This will help users like A, B, and C to find potential partners based on shared interests and values, rather than just physical appearance or location. \n\nThe product also aims to provide a safe and secure environment for its users. By implementing strict verification processes, DateSmart will help prevent fraud, scamsters, and fake accounts, thereby increasing user trust and satisfaction.\n\nFrom a business perspective, DateSmart aligns with the company's business model of generating revenue through in-app purchases and premium subscriptions. By offering a unique and appealing product, DateSmart will attract a large user base, which in turn will increase the potential for revenue generation.\n\nMoreover, by providing a safe and secure platform for online dating, DateSmart will also contribute to the company's goal of promoting goodwill and maintaining a positive brand image. This will not only attract more users but also potential investors and partners, thereby contributing to the overall growth and success of the company.\n\nIn summary, the objectives of DateSmart are not only aligned with the company's goals but also designed to provide significant value to its users and stakeholders. By focusing on meaningful conversations and user safety, DateSmart will redefine the online dating experience and set a new standard in the industry.", additional_kwargs={}, example=False), 
        HumanMessage(content='Launch Strategy:\nCompare US vs International markets for this product. Also, analyze this product and figure out what customer demographic is this product for. Based on these things, come up with a detailed launch strategy for the product. List the TAM vs SAM vs SOM. TAM or Total Available Market is the total market demand for a product or service. SAM or Serviceable Available Market is the segment of the TAM targeted by your products and services which is within your geographical reach. SOM or Serviceable Obtainable Market is the portion of SAM that you can capture.', additional_kwargs={}, example=False), 
        AIMessage(content="## Launch Strategy\n\nWhen launching **DateSmart**, it's crucial to consider the differences between the US and international markets. The US market is mature and highly competitive with numerous established dating apps. However, it also has a large user base that is open to trying new products, especially those that offer unique features like DateSmart. \n\nOn the other hand, international markets, especially emerging markets like India, present a significant growth opportunity. These markets have a large, tech-savvy youth population that is increasingly embracing online dating. However, these markets may also present challenges such as cultural differences and varying levels of internet penetration.\n\nGiven these considerations, the launch strategy for DateSmart should be a phased approach. Initially, the product should be launched in the US market. This will allow the team to test the product, gather user feedback, and make necessary improvements. Once the product has been validated and refined, it can then be rolled out to international markets, starting with emerging markets like India.\n\nIn terms of customer demographics, DateSmart is designed for a broad demographic that includes anyone looking for a more genuine connection in the dating scene. This includes college students, working professionals, and even retired individuals. \n\nNow, let's look at the TAM, SAM, and SOM:\n\n- **TAM (Total Available Market)**: This would be all single adults who use smartphones globally. According to Statista, there were about 3.5 billion smartphone users worldwide in 2020. Assuming that about half of them are single and open to online dating, the TAM would be approximately 1.75 billion.\n\n- **SAM (Serviceable Available Market)**: This would be all single adults who use smartphones in the US and India, which are the initial target markets. According to Statista, there were about 260 million smartphone users in the US and 500 million in India in 2020. Assuming that about half of them are single and open to online dating, the SAM would be approximately 380 million.\n\n- **SOM (Serviceable Obtainable Market)**: This would be the portion of the SAM that DateSmart can realistically capture. Given the competitive nature of the online dating market, a realistic target might be to capture 1% of the SAM in the first year. This would give a SOM of about 3.8 million users.\n\nIn conclusion, the launch strategy for DateSmart should be a phased approach, starting with the US market and then expanding to international markets. The product is designed for a broad demographic, and there is a significant market opportunity both in the US and internationally.", additional_kwargs={}, example=False), 
        # HumanMessage(content='User Stories:\nCreate user stories for the product. User stories are short, simple descriptions of a feature told from the perspective of the person who desires the new capability, usually a user or customer of the system. They typically follow a simple template: As a < type of user >, I want < some goal > so that < some reason >. For example, As a college student, I want to be able to share my notes with my friends so that I can help them with their studies.', additional_kwargs={}, example=False), 
        # AIMessage(content='## User Stories\n\n1. **As a college student (like A from India)**, I want to engage in meaningful conversations with potential matches, so that I can find a partner who shares my interests and values.\n\n2. **As a working professional (like B from the US)**, I want a dating app that prioritizes conversation over superficial aspects, so that I can find a genuine connection despite my busy schedule.\n\n3. **As a retired individual (like C from the UK)**, I want to use a dating app that ensures strict verification, so that I can feel safe while looking for companionship.\n\n4. **As a user of DateSmart**, I want to have the option to report or block any suspicious or inappropriate behavior, so that I can contribute to maintaining a safe and respectful community.\n\n5. **As a user of DateSmart**, I want to have access to a user-friendly interface, so that I can easily navigate through the app and have a pleasant user experience.\n\n6. **As a user of DateSmart**, I want to receive notifications when someone sends me a message, so that I can promptly respond and keep the conversation going.\n\n7. **As a user of DateSmart**, I want to have the option to upgrade to a premium subscription, so that I can access additional features and enhance my user experience. \n\n8. **As a user of DateSmart**, I want to have the option to customize my profile, so that I can express my personality and interests to potential matches. \n\n9. **As a user of DateSmart**, I want to have the option to set preferences for potential matches, so that I can find people who meet my specific criteria. \n\n10. **As a user of DateSmart**, I want to have the option to delete or deactivate my account, so that I can have control over my data and privacy.', additional_kwargs={}, example=False),
        HumanMessage(content='Acceptance Criteria:\nDefine the quality of completeness required to be able to get to the MVP stage of this product.', additional_kwargs={}, example=False), 
        AIMessage(content='## Acceptance Criteria\n\nTo reach the Minimum Viable Product (MVP) stage for **DateSmart**, the product must meet the following acceptance criteria:\n\n1. **User Registration and Verification**: The app should have a user-friendly registration process that includes a strict verification system to prevent fraud, scamsters, and fake accounts.\n\n2. **Profile Creation**: Users should be able to create and customize their profiles, including uploading photos, writing a bio, and setting preferences for potential matches.\n\n3. **Messaging Feature**: The app should have a messaging feature that allows users to engage in conversations with potential matches before deciding to match. This feature should be accessible to all users, not just premium users.\n\n4. **Search and Matching Algorithm**: The app should have a robust search and matching algorithm that takes into account user preferences and conversation quality to suggest potential matches.\n\n5. **User Safety Features**: The app should have features that allow users to report or block any suspicious or inappropriate behavior. There should also be a clear and easily accessible privacy policy and terms of service.\n\n6. **User Interface**: The app should have a user-friendly interface that is easy to navigate. It should also be visually appealing and consistent with the brand image.\n\n7. **Performance**: The app should be fast and responsive, with minimal lag or loading times. It should also be stable, with no crashes or major bugs.\n\n8. **Compatibility**: The app should be compatible with both iOS and Android platforms. It should also be optimized for different screen sizes and orientations.\n\n9. **In-app Purchases and Premium Subscriptions**: The app should offer in-app purchases and premium subscriptions that provide additional features or benefits to users.\n\n10. **Feedback Mechanism**: The app should have a mechanism for users to provide feedback or report issues. This will be crucial for continuous improvement and user satisfaction.\n\nBy meeting these acceptance criteria, DateSmart will be able to provide a unique and valuable product that meets the needs of its target audience and sets a new standard in the online dating industry.', additional_kwargs={}, example=False), 
        HumanMessage(content='Technical Feasibilities:\nOutline the technical roadmap for this product. What mobile devices should this application be available for? What is a scalable and reliable tech stack which can be used for the frontend and the backend for this application?', additional_kwargs={}, example=False), 
        AIMessage(content="## Technical Feasibilities\n\nThe technical roadmap for **DateSmart** should focus on developing a robust, scalable, and secure application that provides a seamless user experience. Here's a high-level outline of the technical roadmap:\n\n1. **Phase 1 - MVP Development**: Develop the core features of the app, including user registration, profile creation, messaging feature, search and matching algorithm, and user safety features. This phase should also include rigorous testing to ensure the app is stable and bug-free.\n\n2. **Phase 2 - Platform Compatibility**: Ensure the app is compatible with both iOS and Android platforms. This will involve developing and testing the app on both platforms to ensure a consistent user experience.\n\n3. **Phase 3 - Performance Optimization**: Optimize the app's performance to ensure it is fast and responsive. This may involve refining the code, optimizing images, and implementing caching strategies.\n\n4. **Phase 4 - Scalability**: Prepare the app for scalability to handle a growing number of users. This may involve implementing a scalable database and server architecture, as well as load balancing techniques.\n\n5. **Phase 5 - Security Enhancements**: Implement additional security measures to protect user data and prevent fraud. This may involve implementing encryption, secure APIs, and regular security audits.\n\n6. **Phase 6 - Continuous Improvement**: Implement a feedback mechanism and continuously improve the app based on user feedback and analytics data. This may involve adding new features, refining existing features, and fixing bugs.\n\nIn terms of mobile devices, DateSmart should be available for both iOS and Android devices, as these platforms cover the majority of the smartphone market.\n\nFor the tech stack, a reliable and scalable combination could be:\n\n- **Frontend**: React Native for building the mobile app. It allows for cross-platform development, which means you can write one codebase that works on both Android and iOS.\n\n- **Backend**: Node.js for the server-side application. It's lightweight, efficient, and works well with real-time applications.\n\n- **Database**: MongoDB, a NoSQL database that can handle large amounts of data and is highly scalable.\n\n- **Cloud Services**: AWS or Google Cloud for hosting the application and storing data. Both provide scalable and reliable cloud services.\n\n- **Security**: Implement JWT (JSON Web Tokens) for secure transmission of information between parties and bcrypt for hashing and securing user passwords.\n\nThis tech stack is not only modern and efficient but also highly scalable, which is crucial for a dating app like DateSmart that is expected to handle a large number of users.", additional_kwargs={}, example=False), 
        HumanMessage(content='Timeline:\nDefine the timeline for the product development. In addition to the timeline, what are the resources required to complete this project. Think about the resources required for each stage of the project, the number of employees required for each stage, and the time required for each stage.', additional_kwargs={}, example=False), 
        AIMessage(content="## Timeline and Resources\n\nThe development of **DateSmart** can be divided into several stages, each with its own timeline and resource requirements. Here's a high-level overview:\n\n1. **Phase 1 - MVP Development (3 months)**: This phase involves developing the core features of the app. It will require a team of developers (both frontend and backend), a UX/UI designer, a product manager, and a QA tester. \n\n2. **Phase 2 - Platform Compatibility (1 month)**: This phase involves ensuring the app is compatible with both iOS and Android platforms. It will require the same team as Phase 1, with the addition of an iOS and Android specialist.\n\n3. **Phase 3 - Performance Optimization (1 month)**: This phase involves optimizing the app's performance. It will require the development team and the QA tester.\n\n4. **Phase 4 - Scalability (1 month)**: This phase involves preparing the app for scalability. It will require a backend developer with experience in scalable architecture and a database administrator.\n\n5. **Phase 5 - Security Enhancements (1 month)**: This phase involves implementing additional security measures. It will require a security specialist in addition to the development team.\n\n6. **Phase 6 - Continuous Improvement (Ongoing)**: This phase involves continuously improving the app based on user feedback and analytics data. It will require the entire team on an ongoing basis.\n\nIn terms of non-human resources, the project will require software and tools for development (e.g., IDEs, version control system), design (e.g., design software), project management (e.g., project management software), and testing (e.g., testing tools). It will also require cloud services for hosting the application and storing data.\n\nIn summary, the development of DateSmart is expected to take around 7 months from start to finish, not including the ongoing improvement phase. The project will require a diverse team of professionals, as well as various software and tools.", additional_kwargs={}, example=False), 
    ]
)

output = "\n\n".join([message.content for message in chat_history.messages if message.type == 'ai'][1:])
output += "\n\n"

In [1]:
# to get the number of tokens in a string: 
# chain.llm.get_num_tokens('string')

# to get the number of tokens in a list of `messages` (chat_history.messages):
# chain.llm.get_num_tokens_from_messages(messages)

# to get a list of all documents in the database:
# vectordb._client.get_collection(name='langchain').get()

# create a .env file in the root directory of the project with the following variables:
# OPENAI_API_KEY=<openai api key>
# SERPAPI_API_KEY=<serpapi api key> 
# WANDB_API_KEY=<wandb api key> (optional)

In [2]:
string_template = """\
You are a tech product manager. You have to help the user create a Product Requirement Document based on the questions the user asks you. The user will ask you specific questions about each topic they want to be included in the PRD. 

Do not repeat the same information again and again. Answers to each question should be unique and not repetitive. By this I mean do not repeat any ideas or sentences. Do not copy statements and ideas from previous sections. Any ideas or examples should only be in accordance to the particular section.

Format your responses in Markdown mode with each topic being the ##Heading, and your answer being the content. Highlight important points in **bold**. Give the PRD a suitable #Title.

For reference, let us say there are 3 people - A, B, and C belonging to different age groups, professions, and geographies. A is a 20-year-old college student from India. B is a 40-year-old working professional from the US. C is a 60-year-old retired person from the UK.
If required, for that particular section, you can use any of these people as examples to explain your point. The user does not know anything about these people.

You do not need to include these 3 people in every section. You can use them as examples only if required. You can also use other examples if you want to. You can also use yourself as an example if you want to.

Current conversation:
{history}
Human: {input}
AI: """

prompt_template = PromptTemplate(
    template=string_template,
    input_variables=["history", "input"],
)

prompts_list = [
    """Product Overview:
Define the Purpose and Scope of this product. It should include how different groups of users across ages, genders, and geographies can use this product. Include an overview of the product. Why should one use this product? Define the target audience and stakeholders in detail. Also, include the rationale behind having the particular group as the target audience. Explain the gap it is trying to fill as well - how it is different from and better than other similar products?""",
    """Product Objectives:
First, analyze whether the product objectives align with the company objectives if the company and company objectives are mentioned. Else, talk about the objectives of the product, what it will help achieve, and how it will assist customers. Think aloud. Explain your reasoning. Also, talk about why and how the business models of the product and company match. What company goals can the product help achieve - be it attracting customers, generating profits, or promoting the goodwill of the company? Also, explain how it would do this.""",
#     """Market Research:
# First, list out current and potential competitors. Current competitors should include already established businesses/products. Potential competitors should include products and businesses that aren’t yet popular or are still under development/ beta version. Also include major or minor differences between our product and the competitor products you have identified. Analyze how aspects of our product or competitor products are better for that particular aspect. How do the target customers different? Does our product better cater to current trends and expectations of the users? How? What should the product include to meet those trends and expectations.""",
#     """Competitive Analysis Table:
# Use all the above competitors to create a competitive analysis of these applications in a tabular form using the following points - user base, user region, different features supported, and pricing tiers. Don't limit yourself to these categories and think of other categories yourself. Return the output in a well-structured Markdown table""",
#     """Feature Requirements:
# What are some of the important features that should be implemented? Follow the MoSCoW format (Must have, Should have, Could have, Won’t have, along with why). How are we going to collect user inputs and use user data that we collect to make the product better and add other features?""",
    """Launch Strategy:
Compare US vs International markets for this product. Also, analyze this product and figure out what customer demographic is this product for. Based on these things, come up with a detailed launch strategy for the product. List the TAM vs SAM vs SOM. TAM or Total Available Market is the total market demand for a product or service. SAM or Serviceable Available Market is the segment of the TAM targeted by your products and services which is within your geographical reach. SOM or Serviceable Obtainable Market is the portion of SAM that you can capture.""",
#     """User Stories:
# Create user stories for the product. User stories are short, simple descriptions of a feature told from the perspective of the person who desires the new capability, usually a user or customer of the system. They typically follow a simple template: As a < type of user >, I want < some goal > so that < some reason >. For example, As a college student, I want to be able to share my notes with my friends so that I can help them with their studies.""",
    """Acceptance Criteria:
Define the quality of completeness required to be able to get to the MVP stage of this product.""",
#     """Success Metrics:
# How do we define success in this product? What are the KPIs to look out for? How are they measured? Why do those KPIs matter? How are we going to use these KPIs to make the product better?""",
    """Technical Feasibilities:
Outline the technical roadmap for this product. What mobile devices should this application be available for? What is a scalable and reliable tech stack which can be used for the frontend and the backend for this application?""",
    """Timeline:
Define the timeline for the product development. In addition to the timeline, what are the resources required to complete this project. Think about the resources required for each stage of the project, the number of employees required for each stage, and the time required for each stage."""
]

product_names = ["DateSmart", "TalentScout", "MusicMate"]
product_descriptions = [
    "A dating app that encourages users to have a conversation with each other before deciding whether they want to match. While some dating apps allow direct messages, it is only for plus users, and only to a limited number of people. Our app’s focus is to encourage conversation first. The app ensures strict verification to prevent fraud, scamsters and fake accounts.",
    "A Sports Analytics based product that allows coaches, analysts, team managers and owners scout for talent based on performances in the domestic and lower division circuits. The app will take lower division statistics, team information, team-mates’ information of players, along with ground information and weather information on match days, to predict how a player will fare in higher level of sports (national and international). State-of-the-art models such as Graph Neural Networks will be used to generate highly accurate predictions.",
    "An app that recommends music to you based on your preferences, time of the day, and what activity you did before, and what you plan to do after. The app will use predictive modelling, frequency modelling, and NLP techniques. The app will also have a social media aspect to it, where you can share your music with your friends, and see what they are listening to."
]

In [3]:
chat = ChatOpenAI(
    model="gpt-4",
    temperature=0,
    openai_api_key=os.environ["OPENAI_API_KEY"],
    max_retries=6, # 6 retries for 2, 4, 8, 16, 32, and 60 seconds
)

# wandb.init(
#     project="chat-prd-gpt-4",
#     config={
#         "model": "gpt-4",
#         "temperature": 0
#     },
#     entity="arihantsheth",
#     name=f"{product_name}_gpt-4",
# )

memory = ConversationBufferMemory(chat_memory=chat_history)

chain = LLMChain(
    llm=chat,
    memory=memory,
    prompt=prompt_template,
    verbose=False
)

In [4]:
# # Not required to run this cell, it will only incur additional cost. 
# # All the results are already saved in `output` and `chat_history` variables.

# for product_name, product_description in zip(product_names[0:1], product_descriptions[0:1]):

#     chat = ChatOpenAI(
#     model="gpt-4",
#     temperature=0,
#     openai_api_key=os.environ["OPENAI_API_KEY"],
#     request_timeout=120,
#     )

#     wandb.init(
#         project="chat-prd-gpt-4",
#         config={
#             "model": "gpt-4",
#             "temperature": 0
#         },
#         entity="arihantsheth",
#         name=f"{product_name}_gpt-4",
#     )

#     memory = ConversationBufferMemory()

#     chain = LLMChain(
#         llm=chat,
#         memory=memory,
#         prompt=prompt_template,
#         verbose=False
#     )

#     with get_openai_callback() as callback:

#         initial_output = chain.predict(
#             input=f"""\
# I want to create the following new product:
# {product_name}.

# Product description: {product_description}

# DO NOT START WRITING. WAIT FOR THE HUMAN TO WRITE "Start generating the PRD" BEFORE YOU START WRITING.
# """, 
#             callbacks=[WandbTracer()]
# )

#         output = ""
#         for i, prompt in enumerate(prompts_list):

#             # with get_openai_callback() as callback:

#             output += chain.predict(
#                 input=prompt,
#                 callbacks=[WandbTracer()]
#             )

#             print(f"Prompt {i+1} of {len(prompts_list)}")

#             output += "\n\n"

#         # if not os.path.exists(f"../generated_prds/{product_name}"):
#         #     os.makedirs(f"../generated_prds/{product_name}")

#         # with open(f"../generated_prds/{product_name}/{product_name} 2023 Chat gpt-4.md", "w") as f:
#         #     f.write(output)
    
#     wandb.log({"prd": output})
#     clear_output()

# print(f"Product name: {product_name}")
# print(f"Total tokens used: {callback.total_tokens}")
# print(f"Prompt Tokens: {callback.prompt_tokens}")    
# print(f"Completion Tokens: {callback.completion_tokens}")
# print(f"Successful Requests: {callback.successful_requests}")
# print(f"cost to generate: ${callback.total_cost:.2f}")

In [5]:
def search_and_embed(search_query, vectordb):
    search = GoogleSearch({
    "q": search_query,
    "location": "Mumbai, Maharashtra, India",
    "api_key": os.environ["SERPAPI_API_KEY"],
    })

    results = search.get_dict()

    if "error" in results:
        return f"Error: {results['error']}"
            
    else:
        print(f"Number of organic results: {len(results['organic_results'])}")

    results_condensed = [(result['title'], result['link']) for result in results['organic_results'][:3]]
    content_p = ""
    count_p = 0
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

    for title, link in results_condensed:
        print(f"Title: {title}")
        # print(f"Link: {link}")

        try:
            print(f"Requesting {link}")
            response = requests.get(link)
        except requests.exceptions.ConnectionError:
            print("Connection timed out... Moving to next link")
            continue
        except Exception as e:
            print(f"Error: {e}")
            continue
        # print(f"Response code: {response.status_code}")
        # print(f"Reponse Message: {http_responses[response.status_code]}")
        if response.status_code != 200:
            print()
            continue

        soup = BeautifulSoup(response.text, 'html.parser')
        webpage = ""
        webpage += f'## {title}' + "\n"

        content_p += f'## {title}' + "\n"
        for p in soup.find_all('p'):
            paragraph = p.get_text(separator=' ')

            if len(paragraph) > 100:
                webpage += paragraph
                content_p += paragraph
                content_p += "\n\n"
                count_p += 1

        doc = text_splitter.create_documents(texts=[webpage], metadatas=[{"source": link, "title": title}])
        ids = vectordb.add_documents(documents=[*doc])
        print(f"Added {len(ids)} documents to the database")
        print()

        content_p += "\n-------------------------------------------------------------------------------------\n"

    clear_output(wait=True)
    return "Success"

def update_qa_chain(vectordb):
    retriever = vectordb.as_retriever(search_kwargs={"k": 2})

    qa_chain_chat = ConversationalRetrievalChain.from_llm(llm=ChatOpenAI(model="gpt-4", temperature=0),
                                                          chain_type="stuff",
                                                          retriever=retriever,
                                                          return_source_documents=True,
                                                          )
    
    return qa_chain_chat

def search_competitors_info(competitors, competitor_queries, vectordb):
    for competitor in competitors:
        print(competitor)
        for query in competitor_queries:
            query = query.format(competitor=competitor)
            search_and_embed(search_query=query, vectordb=vectordb)

    return "Success"

def query_competitors_db(competitors, qa_chain_chat):
    comp_analysis_results = {competitor: {} for competitor in competitors}

    for competitor in competitors:
        print(f"Competitor: {competitor}")

        competitor_queries = [
        "What is the user base of {competitor}?",
        "What is the revenue of {competitor}?",
        "What are new features of {competitor}?",
        ]

        query_names = ["User Base", "Revenue", "New Features"]

        for query, dict_key in zip(competitor_queries, query_names):
            db_res = qa_chain_chat(
                {
                    "question": query.format(competitor=competitor),
                    "chat_history": [],
                }
            )
            # time.sleep(120)
            comp_analysis_results[competitor][dict_key] = db_res["answer"] + "\n Web Source: " + db_res['source_documents'][0].metadata['source']

    return comp_analysis_results

def query_metrics_db(metrics_search_query, qa_chain_chat):
    db_res = qa_chain_chat(
        {
            "question": metrics_search_query,
            "chat_history": [],
        }
    )
    
    return db_res["answer"] + "\n Web Source: " + db_res['source_documents'][0].metadata['source']

In [6]:
get_google_search_query_prompt = f"""\
Generate a Google search query to find the names of top apps 
Do not include the following in the search query:
- Double quotes
- Current Date or Year
- A period at the end of the sentence
- Location (e.g. `in the US`, `in the world`)

Only return the query nothing else.
"""

with get_openai_callback() as callback_google_search_query:
    search_query = chain.predict(
        input=get_google_search_query_prompt,
        # callbacks=[WandbTracer()]
    )

print(f"Goolge Search query: {search_query}")

Goolge Search query: top dating apps


In [7]:
get_db_retrieval_query_prompt = """\
Generate a natural language query for the database retrieval system. \
It best understands query in the following format: \
Top {category} {apps/products/companies}
"""

with get_openai_callback() as callback_db_retrieval_query:
    db_retrieval_query = chain.predict(
        input=get_db_retrieval_query_prompt,
        # callbacks=[WandbTracer()]
    )

print(f"DB Retrieval query: {db_retrieval_query}")

DB Retrieval query: Top dating apps


In [8]:
get_metrics_query_prompt = """\
Generate a Google search query to find the best metrics to measure \
how well a product in our category is doing.
Only return the query nothing else.
"""

with get_openai_callback() as callback_db_retrieval_query:
    metrics_search_query = chain.predict(
        input=get_metrics_query_prompt,
        # callbacks=[WandbTracer()]
    )

print(f"Metrics Retrieval query: {metrics_search_query}")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for 10KTPM-200RPM in organization org-sRS5hauWsP4QYcQcLLz6SCCw on tokens p

Metrics Retrieval query: best metrics to measure success of a dating app


In [9]:
# search_query = "What are the top dating apps in the market?"
vectordb = Chroma(embedding_function=OpenAIEmbeddings())

# Search for competitors and embed them into VectorDB
search_and_embed(search_query=search_query, vectordb=vectordb)
print("Finished embedding list of competitors into VectorDB.")

# Search for metrics and embed them into VectorDB
search_and_embed(search_query=metrics_search_query, vectordb=vectordb)
print("Finished embedding list of metrics into VectorDB.")

# Load the QA Chain
qa_chain_chat = update_qa_chain(vectordb=vectordb)

# Retrieve info on metrics from VectorDB
metrics_result = query_metrics_db(metrics_search_query, qa_chain_chat)

# time.sleep(120)
# Retrieve the list of competitors from VectorDB
competitors = qa_chain_chat(
        {
            "question": f"{db_retrieval_query}. Only return the names in a comma separated list (maximum 5 names).",
            "chat_history": []
        }
    )['answer'].replace(" ", "").split(",")

print("Finished retrieving list of competitors from VectorDB:")
print(competitors)

# List the queries to search for competitor information
competitor_queries = [
        "What is the user base of the {competitor}?",
        "What is the revenue of the {competitor}?",
        "What are new features of the {competitor}?",
    ]

# Search for competitor information on the web and embed them into VectorDB
search_competitors_info(competitors=competitors, competitor_queries=competitor_queries, vectordb=vectordb)
print("Finished embedding competitor information into VectorDB.")

# Update the QA Chain
qa_chain_chat = update_qa_chain(vectordb=vectordb)

# time.sleep(120)
# Retrieve the competitor information from VectorDB
competitive_analysis_results = query_competitors_db(competitors=competitors, qa_chain_chat=qa_chain_chat)
print("Finished retrieving competitor information from VectorDB and parsed the results into a dictionary.")

Finished embedding competitor information into VectorDB.
Competitor: Tinder
Competitor: Bumble
Competitor: OkCupid
Competitor: Hinge
Competitor: Match.com
Finished retrieving competitor information from VectorDB and parsed the results into a dictionary.


In [10]:
from prettytable import PrettyTable, MARKDOWN

query_names = ["App Name", "User Base", "Revenue", "New Features"]

table = PrettyTable(field_names=query_names, header=True, align="l")
table.set_style(MARKDOWN)

for company in competitive_analysis_results:
    val = competitive_analysis_results[company]
    table.add_row([company, val["User Base"], val["Revenue"], val["New Features"].replace("\n\n", "\n")])

app_features_table = table.get_string(fields=["App Name", "New Features"])
app_details_table = table.get_string(fields=["App Name", "User Base", "Revenue"])

In [11]:
# time.sleep(60) # workaround for RateLimitExceeded error, does not work for me

In [12]:
market_analysis_prompt = f"""\
The following tabular formatted object contains details of competitor apps. Use this information to support your analysis of the market and the product if required:
{app_details_table}

Cite the sources of the information you use to support your analysis. \
The sources can be found in the above table.
Now, let us get continue generating the PRD using the same Markdown format as before.

Market Analysis:
Include major or minor differences between our product and the competitor products. \
Analyze how aspects of our product or competitor products are better for that particular aspect. \
How do the target customers different? \
Does our product better cater to current trends and expectations of the users? How? \
What should the product include to meet those trends and expectations.
"""

with get_openai_callback() as callback_market:
    output += chain.predict(
        input=market_analysis_prompt,
        # callbacks=[WandbTracer()]
    )
    output += "\n\n"

In [13]:
# time.sleep(60) # workaround for RateLimitExceeded error, does not work for me

In [14]:
competitive_table_prompt = """\
Competitive Analysis:
Use all the above competitors to create a competitive analysis of these applications \
in a tabular form using the following points - user base, user region, \
different features supported, and pricing tiers. \
Don't limit yourself to these categories and think of other categories yourself.
Cite the sources of the information you use to support your analysis. \
The sources can be found in the above JSON object.
Return the output in a well-structured Markdown table. Use the competitor app details from the JSON object if required.
"""

with get_openai_callback() as callback_competitive_table:
    output += chain.predict(
        input=competitive_table_prompt,
        # callbacks=[WandbTracer()]
    )
    output += "\n\n"

In [15]:
# time.sleep(60) # workaround for RateLimitExceeded error, does not work for me

In [None]:
feature_req_prompt = f"""\
Feature Requirements:
The following table contains the features of the competitor apps. \
Use this information to support your analysis of the features if required:
{app_features_table}

Cite the sources of the information you use to support your analysis. The sources can be found in the above table.
What are some of the important features that should be implemented? \
Follow the MoSCoW format (Must have, Should have, Could have, Won’t have, along with why). \
How are we going to collect user inputs and use user data that we collect to make \
the product better and add other features?

The JSON above has new features of competitors. Based on the competitors’ new features, talk about what we can do better, and build on their features? What features apart from the those can we also include to stand out?
Cite the sources of the information you use to support your analysis. The sources can be found in the above JSON object.
"""

output += chain.predict(
    input=feature_req_prompt,
    # callbacks=[WandbTracer()]
)
output += "\n\n"

In [None]:
feature_req_prompt_tokens = chain.llm.get_num_tokens_from_messages(chain.memory.chat_memory.messages) + chain.llm.get_num_tokens(feature_req_prompt)
print(f"feature_req_prompt_tokens: {feature_req_prompt_tokens}")

6653

In [None]:
# time.sleep(60) # workaround for RateLimitExceeded error, does not work for me

In [None]:
success_metrics_prompt = f"""\
Success Metrics:

The following are the metrics suggested by a few websites. Use this information to support your analysis of the success metrics if required:
{metrics_result}

Cite the source of the information you use to support your analysis. The source can be found above.
How do we define success in this product? What are the KPIs to look out for? \
How are they measured? Why do those KPIs matter? \
How are we going to use these KPIs to make the product better?
"""

with get_openai_callback() as callback_metrics:
    output += chain.predict(
        input=success_metrics_prompt,
        # callbacks=[WandbTracer()]
    )
    output += "\n\n"

In [None]:
# time.sleep(60) # workaround for RateLimitExceeded error, does not work for me

In [None]:
conclusion_prompt = """\
Conclusion:
Include any final thoughts or comments about the product or the market. \
Include any other information that you think is important to get across to the reader. \
Include any information that is not present in the PRD but is important to the product.
"""

with get_openai_callback() as callback_conlusion:
    output += chain.predict(
        input=conclusion_prompt,
        # callbacks=[WandbTracer()]
    )
    output += "\n\n"

In [None]:
# # Do not run this cell, it is not updated to the latest version of the code

# total_tokens = callback.total_tokens + google_search_query_callback.total_tokens + callback_web_prompts.total_tokens
# prompt_tokens = callback.prompt_tokens + google_search_query_callback.prompt_tokens + callback_web_prompts.prompt_tokens
# completion_tokens = callback.completion_tokens + google_search_query_callback.completion_tokens + callback_web_prompts.completion_tokens
# successful_requests = callback.successful_requests + google_search_query_callback.successful_requests + callback_web_prompts.successful_requests
# total_cost = callback.total_cost + google_search_query_callback.total_cost + callback_web_prompts.total_cost

# print(f"Product name: {product_name}")
# print(f"Total tokens used: {total_tokens}")
# print(f"Prompt Tokens: {prompt_tokens}")    
# print(f"Completion Tokens: {completion_tokens}")
# print(f"Successful Requests: {successful_requests}")
# print(f"Total cost to generate: ${total_cost:.2f}")

In [None]:
display(Markdown(output)) # to view the output, check generated_prds/ folder

In [None]:
# # To save the generated prd to a file

# if not os.path.exists(f"../generated_prds/{product_name}"):
#     os.makedirs(f"../generated_prds/{product_name}")

# with open(f"../generated_prds/{product_name}/{product_name} with internet (features) Chat gpt-4.md", "w") as f:
#     f.write(output)