In [1]:
!pip install vllm
!pip install torch torchvision torchaudio
!pip install pdfkit
!sudo apt-get install wkhtmltopdf

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
wkhtmltopdf is already the newest version (0.12.6-2).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [2]:
import torch
if torch.cuda.is_available():from vllm import LLM, SamplingParams
import torch

class CompetitorFinderAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=2048, max_tokens=512, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()
        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )
        self.tokenizer = self.llm.get_tokenizer()
        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def find_competitors(self, input_query, N=3):
        prompt = (
            f"Identify {N} major competitors for the following product, service, or industry: \"{input_query}\". "
            "Provide only the names of the competitors in a numbered list."
        )

        messages = [
            {"role": "system", "content": "You are a market research expert."},
            {"role": "user", "content": prompt}
        ]

        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        response = self.llm.generate([text], sampling_params=self.sampling_params)
        raw_output = response[0].outputs[0].text

        print("\n\nModel response:")
        print(raw_output)
        print("="*50, "\n\n")


        competitors = [line.split(". ", 1)[1].strip() for line in raw_output.splitlines() if line[0].isdigit() and ". " in line]
        return competitors

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
finder = CompetitorFinderAgent(model_name, device="cuda")
    device = "cuda"
else:
    print("Opps... CUDA not available, change the runtime to GPU")
    device = "cpu"

print(device)

Opps... CUDA not available, change the runtime to GPU
cpu


In [3]:
input_query = "LLM"
print(input_query)

LLM


In [5]:
competitors = finder.find_competitors(input_query)
print(f"Competitors for '{input_query}':")
print(competitors)

In [5]:
del finder

In [None]:
import requests
from bs4 import BeautifulSoup

class WebScraper:
    def __init__(self, user_agent=None):
        self.user_agent = user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        self.headers = {"User-Agent": self.user_agent}

    def google_search_scrape(self, query, websites, num_results=10):
        search_results = []
        for website in websites:
            search_query = f"{query} {website}"
            search_url = f"https://www.google.com/search?q={search_query.replace(' ', '+')}&num={num_results}"
            response = requests.get(search_url, headers=self.headers)

            if response.status_code != 200:
                print(f"Error fetching search results for {search_query}: {response.status_code}")
                continue

            soup = BeautifulSoup(response.text, "html.parser")
            for g in soup.find_all('div', class_='tF2Cxc'):
                title = g.find('h3').text if g.find('h3') else "No Title"
                link = g.find('a')['href'] if g.find('a') else "No Link"
                search_results.append({"website": website, "title": title, "link": link})

        return search_results

    def scrape_page_content(self, url):
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            paragraphs = soup.find_all('p')
            return " ".join([p.text for p in paragraphs])
        else:
            print(f"Failed to fetch {url}")
            return ""

    def get_content_from_query(self, query, websites):
        search_results = self.google_search_scrape(query, websites, num_results=5)

        final_content = ""
        for result in search_results:
            content = self.scrape_page_content(result["link"])
            if len(content) < 20:
              continue

            final_content += content + "\n"

        return final_content

scraper = WebScraper()

In [None]:
query = "information about Pizza"
websites = ["", "site:g2.com"]
scapped_data = []
for competitor in competitors:
    query = f"information about {competitor}"
    content = scraper.get_content_from_query(query, websites)
    scapped_data.append(content)
    print(content)
    print("="*50, "\n\n")












In [None]:
from vllm import LLM, SamplingParams
import torch

class CompetitorInfoExtractor:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=16384, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()
        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )
        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def extract_info(self, competitors, texts, max_len=20000):
        prompts = [
            f"Extract relevant information about {competitor}, from the provided text. "
            "Remove unnecessary data and handle any conflicts by choosing the most reliable and relevant details. "
            "Ensure high data accuracy.\n\n"
            f"Text:\n{text[:max_len]}\n\n"
            for competitor, text in zip(competitors, texts)
        ]

        messages_batch = [
            [{"role": "system", "content": "You are a helpful assistant capable of analyzing data and extracting relevant information. "
                                          "Extract information from the provided text, removing any unnecessary or irrelevant details. "
                                          "Handle data conflicts by selecting the most reliable and accurate details."},
            {"role": "user", "content": prompt}]
            for prompt in prompts
        ]

        texts_batch = [self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]

        responses = self.llm.generate(texts_batch, sampling_params=self.sampling_params)

        extracted_info = []
        for i, response in enumerate(responses):
            response_text = response.outputs[0].text
            extracted_info.append(response_text)

        return extracted_info


model_name = "Qwen/Qwen2.5-0.5B-Instruct"
extractor = CompetitorInfoExtractor(model_name, device="cuda")


INFO 02-04 08:09:00 config.py:526] This model supports multiple tasks: {'generate', 'score', 'embed', 'classify', 'reward'}. Defaulting to 'generate'.
INFO 02-04 08:09:00 llm_engine.py:232] Initializing a V0 LLM engine (v0.7.1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, num_sc

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 02-04 08:09:07 model_runner.py:1116] Loading model weights took 0.9228 GB
INFO 02-04 08:09:09 worker.py:266] Memory profiling takes 1.73 seconds
INFO 02-04 08:09:09 worker.py:266] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
INFO 02-04 08:09:09 worker.py:266] model weights take 0.92GiB; non_torch_memory takes 0.00GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 11.65GiB.
INFO 02-04 08:09:10 executor_base.py:108] # CUDA blocks: 63616, # CPU blocks: 21845
INFO 02-04 08:09:10 executor_base.py:113] Maximum concurrency for 32768 tokens per request: 31.06x
INFO 02-04 08:09:11 llm_engine.py:429] init engine (profile, create kv cache, warmup model) took 3.33 seconds


In [None]:
# competitors = ["Intercom"]
# competitor_info_list = [content]

competitor_info_list = extractor.extract_info(competitors, scapped_data, max_len = 20000)
for competitor_info in competitor_info_list:
    print("\n", "=="*50, "\n", competitor_info, "\n\n")


Processed prompts: 100%|██████████| 3/3 [00:02<00:00,  1.29it/s, est. speed input: 112.41 toks/s, output: 130.93 toks/s]


 Apple is a multinational technology company headquartered in Cupertino, California, USA. The company produces and sells a wide range of consumer electronics, including smartphones, tablets, laptops, and other devices. Apple is known for its user-friendly interface, innovative products, and strong brand identity. The company is also involved in various industries, including software development, healthcare, and renewable energy. Apple has been a leader in the technology industry for over 50 years and is known for its commitment to innovation and customer satisfaction. 



 Amazon is a multinational technology and retail company that operates as a publicly traded company on the NASDAQ Stock Market. The company's primary focus is on the creation, production, and sale of electronic products, including computers, televisions, and other electronic devices. Amazon is headquartered in Seattle, Washington, and has branches in various other locations around the world. The company has a wide ra




In [None]:
del extractor




In [None]:
from vllm import LLM, SamplingParams
import torch

class CompetitorProfileAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=16384, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()
        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )

        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def generate_profile(self, competitors, competitor_info_list):
        profiles = []

        prompts = [
            f"Analyze the following information about {competitor_name} and create a structured profile. "
            "Include an overview, SWOT analysis (Strengths, Weaknesses, Opportunities, Threats), and actionable insights.\n\n"
            f"Competitor Information:\n{competitor_info}"
            for competitor_name, competitor_info in zip(competitors, competitor_info_list)
        ]

        messages_batch = [
            [{"role": "system", "content": "You are a helpful assistant capable of analyzing and structuring data."},
             {"role": "user", "content": prompt}]
            for prompt in prompts
        ]

        texts_batch = [self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]

        responses = self.llm.generate(texts_batch, sampling_params=self.sampling_params)

        for i, response in enumerate(responses):
            response_text = response.outputs[0].text
            profiles.append({
                "competitor_name": competitors[i],
                "profile": response_text
            })

        return profiles

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
profileAgent = CompetitorProfileAgent(model_name, device="cuda")

INFO 01-10 09:37:24 config.py:510] This model supports multiple tasks: {'embed', 'score', 'reward', 'generate', 'classify'}. Defaulting to 'generate'.
INFO 01-10 09:37:24 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 01-10 09:37:27 model_runner.py:1099] Loading model weights took 0.9228 GB
INFO 01-10 09:37:29 worker.py:241] Memory profiling takes 1.79 seconds
INFO 01-10 09:37:29 worker.py:241] the current vLLM instance can use total_gpu_memory (14.75GiB) x gpu_memory_utilization (0.95) = 14.01GiB
INFO 01-10 09:37:29 worker.py:241] model weights take 0.92GiB; non_torch_memory takes 0.04GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 11.62GiB.
INFO 01-10 09:37:29 gpu_executor.py:76] # GPU blocks: 63454, # CPU blocks: 21845
INFO 01-10 09:37:29 gpu_executor.py:80] Maximum concurrency for 32768 tokens per request: 30.98x
INFO 01-10 09:37:30 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 2.83 seconds


In [None]:
profiles = profileAgent.generate_profile(competitors, competitor_info_list)

for profile in profiles:
    print(f"\nCompetitor: {profile['competitor_name']}")
    print(profile['profile'])
    print("="*50, "\n\n")

Processed prompts: 100%|██████████| 3/3 [00:15<00:00,  5.06s/it, est. speed input: 1121.34 toks/s, output: 116.60 toks/s]


Competitor: Dialogflow
Overview:
Dialogflow is a cloud-based conversational interface platform that allows developers to create and deploy conversational interfaces for their mobile, web, and device applications. It is part of the Google Cloud Platform and offers a complete development suite with code editor, library, and tools. The platform supports multiple languages and integrations with popular conversation platforms like Google Assistant, Amazon Alexa, and Facebook Messenger.

Strengths:
1. A comprehensive development suite with code editor, library, and tools.
2. A complete development suite that includes features like code completion, code refactoring, and code analysis.
3. Multiple integrations with popular conversation platforms like Google Assistant, Amazon Alexa, and Facebook Messenger.
4. A simple and easy-to-use platform for developers to create and deploy conversational interfaces.

Weaknesses:
1. It is not a single-purpose platform, and the platform is not suitable for 




In [None]:
del profileAgent

In [None]:
from vllm import LLM, SamplingParams
import torch

class ReportGeneratorAgent:
    def __init__(self, model_name="Qwen/Qwen2.5-0.5B-Instruct", tensor_parallel_size=1, max_model_len=32768, max_tokens=32768, temperature=0.7, top_p=0.9, device="cuda"):
        torch.cuda.empty_cache()

        self.llm = LLM(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=max_model_len,
            device=device
        )
        self.tokenizer = self.llm.get_tokenizer()

        self.sampling_params = SamplingParams(
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens
        )

    def generate_report(self, competitors, profiles):

        prompt = (
            f"Based on the following competitor profiles, generate a detailed competitor analysis report.\n"
            f"Include an introduction, an overview of each competitor, feature comparisons, and strategic recommendations.\n"
            "The report should be structured as follows:\n"
            "1. Introduction\n"
            "2. Competitor Overview\n"
            "3. Feature Comparisons\n"
            "4. Strategic Recommendations\n\n"
            "Competitor Profiles:\n"
        )

        for competitor, profile in zip(competitors, profiles):
            prompt += f"\n{competitor}: {profile}"

        messages = [
            {"role": "system", "content": "You are an expert in competitor analysis and report generation."},
            {"role": "user", "content": prompt}
        ]

        text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        response = self.llm.generate([text], sampling_params=self.sampling_params)

        report = response[0].outputs[0].text

        return report

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
reportAgent = ReportGeneratorAgent(model_name, device="cuda")

INFO 01-10 09:37:49 config.py:510] This model supports multiple tasks: {'embed', 'score', 'reward', 'generate', 'classify'}. Defaulting to 'generate'.
INFO 01-10 09:37:49 llm_engine.py:234] Initializing an LLM engine (v0.6.6.post1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Qwen/Qwen2.5

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 01-10 09:37:52 model_runner.py:1099] Loading model weights took 0.9228 GB
INFO 01-10 09:37:54 worker.py:241] Memory profiling takes 1.77 seconds
INFO 01-10 09:37:54 worker.py:241] the current vLLM instance can use total_gpu_memory (14.75GiB) x gpu_memory_utilization (0.95) = 14.01GiB
INFO 01-10 09:37:54 worker.py:241] model weights take 0.92GiB; non_torch_memory takes 0.04GiB; PyTorch activation peak memory takes 1.43GiB; the rest of the memory reserved for KV Cache is 11.62GiB.
INFO 01-10 09:37:54 gpu_executor.py:76] # GPU blocks: 63454, # CPU blocks: 21845
INFO 01-10 09:37:54 gpu_executor.py:80] Maximum concurrency for 32768 tokens per request: 30.98x
INFO 01-10 09:37:55 llm_engine.py:431] init engine (profile, create kv cache, warmup model) took 2.78 seconds


In [None]:
report = reportAgent.generate_report(competitors, profiles)
print(report)
with open("competitor_analysis_report.txt", "w") as f:
    f.write(report)

Processed prompts: 100%|██████████| 1/1 [00:16<00:00, 16.06s/it, est. speed input: 124.00 toks/s, output: 52.75 toks/s]

### Competitor Analysis Report

#### Introduction

Dialogflow is a cloud-based conversational interface platform developed by Google that offers a comprehensive development suite with code editor, library, and tools. It supports multiple languages and integrations with popular conversation platforms like Google Assistant, Amazon Alexa, and Facebook Messenger. The platform has a strong development suite with features like code completion, code refactoring, and code analysis, making it suitable for developers to create and deploy conversational interfaces.

Alexa, on the other hand, is a virtual assistant technology developed by Amazon that provides voice commands and voice-based services. The company has released several new devices, including the Echo, Echo Dot, Echo Studio, and Echo Show. Alexa offers a wide range of features and personalization options, making it a popular choice for voice assistants.

Microsoft Cortana, on the other hand, is a digital assistant that performs consume




In [None]:
import pdfkit
import markdown

def save_to_pdf(report_markdown, pdf_file_name="competitor_analysis_report.pdf", input_query=None):
    if input_query:
        pdf_file_name = f"{input_query.replace(' ', '_')}_{pdf_file_name}"

    report_markdown = preprocess_markdown(report_markdown)

    report_html = markdown.markdown(report_markdown)

    options = {
        'page-size': 'A4',
        'encoding': "UTF-8",
    }

    pdfkit.from_string(report_html, pdf_file_name, options=options)
    print(f"Report saved as {pdf_file_name}")

def preprocess_markdown(markdown_text):
    lines = markdown_text.splitlines()
    processed_lines = []

    for line in lines:
        # Ensure proper spacing before list items
        if line.strip().startswith("-") and not line.startswith(" "):
            processed_lines.append("\n" + line)  # Add a blank line before the list item
        else:
            processed_lines.append(line)

    return "\n".join(processed_lines)

In [None]:
save_to_pdf(report, input_query=input_query)

Report saved as chatbot_for_customer_suppor_competitor_analysis_report.pdf


In [None]:
del reportAgent