# Input Query
Provide the input query for the competitor analysis (e.g., "chatbot for customer support").


In [None]:
input_query = "chatbot for customer support" # Change as your need
print(input_query)

chatbot for customer support


# Install Dependencies
Install necessary libraries for report generation.


In [None]:
%%capture
!pip install vllm
!pip install torch torchvision torchaudio
!pip install pdfkit
!sudo apt-get install wkhtmltopdf

# Set Device
We need CUDA for vLLM. If CUDA is not available, change the runtime to T4 GPU.


In [None]:
import torch
if torch.cuda.is_available():
    device = "cuda"
else:
    print("Opps... CUDA not available, change the runtime to GPU")
    device = "cpu"

print(device)

cuda


# Find Competitors for the Product
Using the Google Gemini API, this step identifies competitors for the input product.


In [None]:
import google.generativeai as genai
from typing import List

class CompetitorFinderAgent:
    def __init__(self, api_key: str):
        """
        Initialize the CompetitorFinder with an API key for Google Gemini API.
        :param api_key: API key for Gemini.
        """
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel("gemini-pro")

    def find_competitors(self, product_name: str, num_results: int = 3) -> List[str]:
        """
        Find competitors for a given product using Gemini API.
        :param product_name: Name of the product to search competitors for.
        :param num_results: Number of competitor names to return.
        :return: List of competitor names.
        """
        prompt = f"List {num_results} competitors for {product_name} in the market."

        response = self.model.generate_content(prompt)

        if response and response.text:
            competitors = [
                # Strip leading/trailing asterisks and clean up spaces
                line.split(". ", 1)[1].strip().strip("*") for line in response.text.splitlines()
                if line[0].isdigit() and ". " in line
            ]
            return competitors[:num_results]

        return []

api_key = "Your_Gemini_API"
finder = CompetitorFinderAgent(api_key)
competitors = finder.find_competitors(input_query)
print(f"Competitors for '{input_query}':")
print(competitors)


Competitors for 'chatbot for customer support':
['Zendesk', 'Salesforce', 'Oracle']


In [None]:
del finder

# Scrape Competitor Information
This step scrapes relevant information about competitors from specified websites using Google's Custom Search Engine.


In [None]:
import requests
from bs4 import BeautifulSoup

class WebScraper:
    def __init__(self, api_key, cx, user_agent=None):
        self.api_key = api_key
        self.cx = cx  # Custom Search Engine ID
        self.base_url = "https://www.googleapis.com/customsearch/v1"
        self.user_agent = user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        self.headers = {"User-Agent": self.user_agent}

    def google_search_scrape(self, query, websites, num_results=10):
        search_results = []
        for website in websites:
            search_query = f"{query} {website}"
            params = {
                'key': self.api_key,
                'cx': self.cx,
                'q': search_query,
                'num': num_results
            }
            response = requests.get(self.base_url, params=params)

            if response.status_code == 200:
                data = response.json()
                for item in data.get('items', []):
                    title = item.get('title', 'No Title')
                    link = item.get('link', 'No Link')
                    website = item.get('displayLink', 'No Website')

                    # Filter out results with irrelevant website links (empty or default links)
                    if website.strip() and title.strip() != "No Title":
                        search_results.append({"website": website, "title": title, "link": link})
            else:
                print(f"Error fetching search results for {search_query}: {response.status_code}")

        return search_results

    def scrape_page_content(self, url):
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            paragraphs = soup.find_all('p')
            return " ".join([p.text for p in paragraphs])
        else:
            print(f"Failed to fetch {url}")
            return ""

    def get_content_from_query(self, query, websites, num_results=5):
        search_results = self.google_search_scrape(query, websites, num_results)

        final_content = ""
        for result in search_results:
            content = self.scrape_page_content(result["link"])
            if len(content) < 20:  # Avoid empty or irrelevant content
                continue

            final_content += content + "\n"

        return final_content

api_key = "Your_Google_API"  # Replace with your API key
cx = "Your_ID"  # Replace with your Custom Search Engine ID
scraper = WebScraper(api_key, cx)


In [None]:
websites = ["", "site:g2.com"]
scapped_data = []
for competitor in competitors:
    query = f"information about {competitor}"
    content = scraper.get_content_from_query(query, websites)
    scapped_data.append(content)
    print(content)
    print("="*50, "\n\n")

print(scapped_data)

Failed to fetch https://www.g2.com/products/zendesk-support-suite/reviews
Failed to fetch https://www.g2.com/products/zendesk-sell/pricing
Failed to fetch https://www.g2.com/products/zendesk-sell/reviews
Failed to fetch https://www.g2.com/compare/hubspot-service-hub-vs-zendesk-support-suite
Failed to fetch https://www.g2.com/products/zendesk-qa/pricing
Zendesk for customer service The complete service solution Zendesk for employee service Treat employees like customers Zendesk for sales The modern sales solution Sunshine platform Fast, open and flexible Marketplace Apps, integrations and partners Zendesk for service The complete service solution Zendesk for sales The modern sales solution Enterprise Have conversations at scale Small & medium businesses Grow without growing pains Startups Start off on the right foot Industries Meet your market’s needs Conversational CRM Create lasting customer relationships Blog Guides, reports and best practice Events and webinars Learn from wherever y

# Extract Competitor Information
This step uses a language model to extract and refine relevant information about each competitor from the scraped data. It ensures the data is accurate, removing any irrelevant or conflicting details.


In [None]:
from vllm import LLM, SamplingParams
import torch

class CompetitorInfoExtractor:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=16384, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()
        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )
        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def extract_info(self, competitors, texts, max_len=20000):
        prompts = [
            f"Extract relevant information about {competitor}, from the provided text. "
            "Remove unnecessary data and handle any conflicts by choosing the most reliable and relevant details. "
            "Ensure high data accuracy.\n\n"
            f"Text:\n{text[:max_len]}\n\n"
            for competitor, text in zip(competitors, texts)
        ]

        messages_batch = [
            [{"role": "system", "content": "You are a helpful assistant capable of analyzing data and extracting relevant information. "
                                          "Extract information from the provided text, removing any unnecessary or irrelevant details. "
                                          "Handle data conflicts by selecting the most reliable and accurate details."},
            {"role": "user", "content": prompt}]
            for prompt in prompts
        ]

        texts_batch = [self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]

        responses = self.llm.generate(texts_batch, sampling_params=self.sampling_params)

        extracted_info = []
        for i, response in enumerate(responses):
            response_text = response.outputs[0].text
            extracted_info.append(response_text)

        return extracted_info


model_name = "Qwen/Qwen2.5-0.5B-Instruct"
extractor = CompetitorInfoExtractor(model_name, device="cuda")


INFO 02-08 19:31:24 __init__.py:190] Automatically detected platform cuda.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


INFO 02-08 19:31:43 config.py:542] This model supports multiple tasks: {'generate', 'score', 'classify', 'reward', 'embed'}. Defaulting to 'generate'.
INFO 02-08 19:31:43 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.2) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, num_sc

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-08 19:31:50 model_runner.py:1115] Loading model weights took 0.9277 GB
INFO 02-08 19:31:52 worker.py:267] Memory profiling takes 2.04 seconds
INFO 02-08 19:31:52 worker.py:267] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
INFO 02-08 19:31:52 worker.py:267] model weights take 0.93GiB; non_torch_memory takes 0.05GiB; PyTorch activation peak memory takes 1.44GiB; the rest of the memory reserved for KV Cache is 11.59GiB.
INFO 02-08 19:31:53 executor_base.py:110] # CUDA blocks: 63290, # CPU blocks: 21845
INFO 02-08 19:31:53 executor_base.py:115] Maximum concurrency for 32768 tokens per request: 30.90x
INFO 02-08 19:31:57 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 6.83 seconds


In [None]:
competitor_info_list = extractor.extract_info(competitors, scapped_data, max_len = 20000)
for competitor_info in competitor_info_list:
    print("\n", "=="*50, "\n", competitor_info, "\n\n")


Processed prompts: 100%|██████████| 3/3 [00:13<00:00,  4.45s/it, est. speed input: 936.80 toks/s, output: 82.93 toks/s]


 Zendesk is an American company that provides software-as-a-service products related to customer support, sales, and other customer communications. It was founded in Copenhagen, Denmark in 2007 and raised about $86 million in venture capital investments before going public in 2014. Zendesk has grown internationally and has offices in Ireland, Denmark, and Australia. It was acquired by Hellman & Friedman and Permira for approximately $10.2 billion in 2014. The company provides customer service, employee service, sales, and service solutions. It offers a customer support platform, a sales platform, and a service platform. 



 Here is the extracted relevant information about Salesforce from the provided text, along with handling of any conflicts:

1. **Company Information**:
   - Salesforce is the world's leading customer relationship management (CRM) technology company.

2. **Purpose and Nature of Data**:
   - Salesforce's primary purpose is to help businesses succeed by using business




In [None]:
del extractor

# Generate Competitor Profiles
In this step, the model analyzes the information about each competitor and generates detailed profiles. These profiles include an overview, SWOT analysis, and actionable insights, providing a comprehensive look at each competitor's strengths, weaknesses, opportunities, and threats.


In [None]:
from vllm import LLM, SamplingParams
import torch

class CompetitorProfileAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=16384, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()
        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )

        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def generate_profile(self, competitors, competitor_info_list):
        profiles = []

        prompts = [
            f"Analyze the following information about {competitor_name} and create a structured profile. "
            "Include an overview, SWOT analysis (Strengths, Weaknesses, Opportunities, Threats), and actionable insights.\n\n"
            f"Competitor Information:\n{competitor_info}"
            for competitor_name, competitor_info in zip(competitors, competitor_info_list)
        ]

        messages_batch = [
            [{"role": "system", "content": "You are a helpful assistant capable of analyzing and structuring data."},
             {"role": "user", "content": prompt}]
            for prompt in prompts
        ]

        texts_batch = [self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]

        responses = self.llm.generate(texts_batch, sampling_params=self.sampling_params)

        for i, response in enumerate(responses):
            response_text = response.outputs[0].text
            profiles.append({
                "competitor_name": competitors[i],
                "profile": response_text
            })

        return profiles

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
profileAgent = CompetitorProfileAgent(model_name, device="cuda")

INFO 02-08 19:32:15 config.py:542] This model supports multiple tasks: {'generate', 'score', 'classify', 'reward', 'embed'}. Defaulting to 'generate'.
INFO 02-08 19:32:15 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.2) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, num_sc

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-08 19:32:23 model_runner.py:1115] Loading model weights took 0.9238 GB
INFO 02-08 19:32:27 worker.py:267] Memory profiling takes 2.47 seconds
INFO 02-08 19:32:27 worker.py:267] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
INFO 02-08 19:32:27 worker.py:267] model weights take 0.92GiB; non_torch_memory takes 0.00GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 11.65GiB.
INFO 02-08 19:32:29 executor_base.py:110] # CUDA blocks: 63611, # CPU blocks: 21845
INFO 02-08 19:32:29 executor_base.py:115] Maximum concurrency for 32768 tokens per request: 31.06x
INFO 02-08 19:32:30 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 6.02 seconds


In [None]:
profiles = profileAgent.generate_profile(competitors, competitor_info_list)

for profile in profiles:
    print(f"\nCompetitor: {profile['competitor_name']}")
    print(profile['profile'])
    print("="*50, "\n\n")

Processed prompts: 100%|██████████| 3/3 [00:19<00:00,  6.37s/it, est. speed input: 68.25 toks/s, output: 120.33 toks/s]


Competitor: Zendesk
Overview:
Zendesk is a leading provider of software-as-a-service solutions for customer support, sales, and other customer communications. The company was founded in 2007 in Copenhagen, Denmark and has since grown internationally. Zendesk offers a range of services including customer service, sales, and other customer communications. The company is known for its strong customer base and innovative product offerings. Zendesk is a well-known company and has a strong reputation in the industry.

SWOT Analysis:

Strengths:
1. Strong customer base: Zendesk has a large and loyal customer base, which provides a significant source of revenue.
2. Innovative product offerings: Zendesk offers a range of products that cater to different customer needs, including customer support, sales, and other customer communications.
3. Strong reputation: Zendesk has a strong reputation for providing innovative and effective solutions to customer issues.

Weaknesses:
1. Competition: The in




In [None]:
del profileAgent

## Generate Competitor Analysis Report and Save
In this step, the model creates a detailed competitor analysis report based on the competitor profiles. The report includes an introduction, an overview of each competitor, feature comparisons, and strategic recommendations for the business.

After generating the report, it is saved both as a text file and as a PDF for easy sharing and presentation.


In [None]:
from vllm import LLM, SamplingParams
import torch

class ReportGeneratorAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=32768, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()

        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )
        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def generate_report(self, competitors, profiles):

        prompt = (
            f"Based on the following competitor profiles, generate a detailed competitor analysis report.\n"
            f"Include an introduction, an overview of each competitor, feature comparisons, and strategic recommendations.\n"
            "The report should be structured as follows:\n"
            "1. Introduction\n"
            "2. Competitor Overview\n"
            "3. Feature Comparisons\n"
            "4. Strategic Recommendations\n\n"
            "Competitor Profiles:\n"
        )

        for competitor, profile in zip(competitors, profiles):
            prompt += f"\n{competitor}: {profile}"

        messages = [
            {"role": "system", "content": "You are an expert in competitor analysis and report generation."},
            {"role": "user", "content": prompt}
        ]

        text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        response = self.llm.generate([text], sampling_params=self.sampling_params)

        report = response[0].outputs[0].text

        return report

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
reportAgent = ReportGeneratorAgent(model_name, device="cuda")

INFO 02-08 19:32:54 config.py:542] This model supports multiple tasks: {'generate', 'score', 'classify', 'reward', 'embed'}. Defaulting to 'generate'.
INFO 02-08 19:32:54 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.2) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, num_sc

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-08 19:32:58 model_runner.py:1115] Loading model weights took 0.9238 GB
INFO 02-08 19:33:00 worker.py:267] Memory profiling takes 1.68 seconds
INFO 02-08 19:33:00 worker.py:267] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
INFO 02-08 19:33:00 worker.py:267] model weights take 0.92GiB; non_torch_memory takes 0.00GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 11.65GiB.
INFO 02-08 19:33:00 executor_base.py:110] # CUDA blocks: 63611, # CPU blocks: 21845
INFO 02-08 19:33:00 executor_base.py:115] Maximum concurrency for 32768 tokens per request: 31.06x
INFO 02-08 19:33:00 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 2.93 seconds


In [None]:
report = reportAgent.generate_report(competitors, profiles)
print(report)
with open("competitor_analysis_report.txt", "w") as f:
    f.write(report)

Processed prompts: 100%|██████████| 1/1 [01:18<00:00, 78.51s/it, est. speed input: 32.28 toks/s, output: 31.45 toks/s]

### Introduction

The competition landscape for Oracle Corporation is extensive, encompassing various players in the database software industry. This report aims to provide a comprehensive analysis of Oracle Corporation's competitors, highlighting their strengths, weaknesses, and opportunities. The analysis will also offer strategic recommendations to help Oracle Corporation optimize its strategy and expand its market share.

### Overview of Each Competitor

#### Zendesk
- **Overview**: Zendesk is a leading provider of customer support, sales, and other customer communications. Founded in 2007 in Copenhagen, Denmark, the company has since grown internationally. Zendesk offers a range of services including customer service, sales, and other customer communications. The company is known for its strong customer base and innovative product offerings.
- **SWOT Analysis**: 
  - **Strengths**:
    1. **Strong customer base**: Zendesk has a large and loyal customer base, which provides a signi




In [None]:
import pdfkit
import markdown

def save_to_pdf(report_markdown, pdf_file_name="competitor_analysis_report.pdf", input_query=None):
    if input_query:
        pdf_file_name = f"{input_query.replace(' ', '_')}_{pdf_file_name}"

    report_markdown = preprocess_markdown(report_markdown)

    report_html = markdown.markdown(report_markdown)

    options = {
        'page-size': 'A4',
        'encoding': "UTF-8",
    }

    pdfkit.from_string(report_html, pdf_file_name, options=options)
    print(f"Report saved as {pdf_file_name}")

def preprocess_markdown(markdown_text):
    lines = markdown_text.splitlines()
    processed_lines = []

    for line in lines:
        # Ensure proper spacing before list items
        if line.strip().startswith("-") and not line.startswith(" "):
            processed_lines.append("\n" + line)  # Add a blank line before the list item
        else:
            processed_lines.append(line)

    return "\n".join(processed_lines)

In [None]:
save_to_pdf(report, input_query=input_query)

Report saved as chatbot_for_customer_support_competitor_analysis_report.pdf


In [None]:
del reportAgent