# Setting up the environment

In [None]:
!pip install -q crewai
!pip install -q crewai.tools
from crewai import Agent, Task, Process, Crew
!pip install -q langchain-google-genai
!pip install -q matplotlib
!pip install -q fpdf
!pip install -q firecrawl-py
!pip install -q transformers accelerate bitsandbytes langchain-huggingface


# Setting up a model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface.llms import HuggingFacePipeline
import torch
from huggingface_hub import login
from google.colab import userdata
login(userdata.get('HF-TOKEN'))

model_id = "mistralai/Mistral-7B-Instruct-v0.3"
print("Loading Tokenizer")
tokenizer = AutoTokenizer.from_pretrained(model_id)

print("Loading the Model in 8bit, this could take a while")
model= AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    load_in_8bit=True,
    device_map="auto"
)
print("Model Loaded")

text_pipeline = pipeline(
    "text_generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.5,
    do_sample=True,
    top_k=50,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id
)

loca_mistral_llm = HuggingFacePipeline(pipeline=text_pipeline)
print("Model's ready for use")

# Creating Tools


In [None]:
from typing import Type, List, Dict, Union
from pydantic import BaseModel, Field
from crewai.tools import BaseTool
from fpdf import FPDF
import matplotlib.pyplot as plt
import os



# creating the input schema for the plot tool
class PlotInput(BaseModel):
  data: Dict[str, Union[List[str], List[float], List[int]]] = Field(
      ...,
      description="""A dictionary containing Plot data.
                  Must have 'labels' (List[str] for x-axis categories)
                  and 'values' (List[float] or List[int] for y_axis values)
                  Example: {'labels':['Jan', 'Feb'], 'values': [100,150]}"""
  )
  plot_type: str = Field(
      ...,
      description="Typle of plot to generate: 'bar' or 'line'"
  )
  title: str = Field(
      ...,
      description= "The title of the plot"
  )
  x_label: str = Field(
      ...,
      description= "Label for the x-axis"
  )
  y_label: str = Field(
      ...,
      description= "Label for the y-axis"
  )
  output_filename: str = Field(
      ...,
      description="Desired filename for the generated plot image (e.g., 'revenue_chart.png')."
  )

# code of the tool itself, responsable for creating visualizations with matplotlib and PlotInput
class GeneratePlotTool(BaseTool):
  name: str = "Generate Plot Image"
  description: str = (
      """Generates a plot (bar or line chart) form given data and saves it as a PNG
      Useful for visualizing numerical trends and comparisons within reports.
      Requires data, plot type, title, x_label, y_label, and an output filename.
      Example usage: tool.generate_plot_image(data={'labels':['A', 'B'], 'values':[10, 20], plot_type='bar', title='Sales', x_label='Category', y_label='Amount', output_filename='sales_chart.png'})"""
  )
  args_schema: Type[BaseModel] = PlotInput

  def _run(self, data: Dict[str, Union[List[str], List[float], List[int]]], plot_type: str,
           title: str, x_label: str, y_label: str, output_filename: str) -> str:
           """
           This is the logic for creating the plot.
           This method will be called by the agent ('Nexus' in this case) with validated arguments
           """
           try:
              labels = data.get('labels')
              values = data.get('values')

              if not labels or not values or len(labels) != len(values):
                return "Error: Data must contain 'labels' and 'values' of equal length."

              output_dir = "output_charts" # change with specific directory
              os.makedirs(output_dir, exist_ok=True)
              plot_path = os.path.join(output_dir, output_filename)

              plt.figure(figsize=(10,6))

              if plot_type == 'bar':
                plt.bar(labels, values)
              elif plot_type == 'line':
                plt.plot(labels, values, marker='o')
              else:
                return f"Error: Unsupported plt_type '{plot_type}'. Must be 'bar' or 'line'."

              plt.title(title)
              plt.xlabel(x_label)
              plt.ylabel(y_label)
              plt.grid(True, linestyle='--', alpha=0.6)
              plt.tight_layout()
              plt.savefig(plot_path)
              plt.close()

              return f"Plot image successfully saved to: {plot_path}"
           except Exception as e:
              return f"Failed to generate plot: {e}"

# creating the input schema for the pdf maker tool
class PDFInput(BaseModel):
  report_text_file: str = Field(
      ...,
      description= """The file path to the main text context of the report
                   (e.g., 'output/report_text.md)"""
  )
  image_paths: List[str] = Field(
      ...,
      description= """A list of file paths to previously generated chart images
                   (e.g., ['output/charts/chat1.png', 'output/charts/chart2.png'])"""
  )
  output_pdf_filename: str = Field(
      ...,
      description= """The desired filename for the PDF report (e.g., 'Executive_Summary_Report.pdf')"""
  )
  title_text: str = Field(
      ...,
      description= """The title that's going to be displayed in the first page of the PDF
                   (e.g., Business Research Report for MostlyOpenAI)"""
  )
# creating the pdf tool
class CreatePDFReportTool(BaseTool):
  name: str = "Create PDF Report"
  description: str = (
      """Assembles a professional PDF report from a given markdown/text file and a list of image paths
      Each image will be placed on a new page after the text context.
      Requires the path to the report's text content, a list of image file paths,
      the desired output PDF filename, and a main title for the report.
      Useful for generating final, shareable executive docuemnts"""
  )
  args_schema: Type[BaseModel] = PDFInput
  def _run(self, report_text_file: str, image_paths: List[str],
           output_pdf_filename: str, title_text: str) -> str:

           """
           This is the actual logic.
           This tool will also be used for 'Nexus' for the PDF aggregation of everything
           """

           try:
              output_dir="output/reports"
              os.makedirs(output_dir, exist_ok=True)
              output_pdf_path= os.path.join(output_dir, output_pdf_filename)

              pdf = FPDF()
              pdf.set_auto_page_break(auto=True, margin=15)
              pdf.add_page()
              pdf.set_font("Arial", size=24)
              pdf.cell(0, 20, title_text, ln=True, align='C')
              pdf.ln(10)

              pdf.set_font("Arial", size=12)
              with open(report_text_file, 'r', encoding='utf-8') as f:
                report_content = f.read()
                pdf.multi_cell(0,8,txt=report_content)
              pdf.ln(10)

              image_counter = 0
              image_text_description = "Image number: "
              for img_path in image_paths:
                if os.path_exists(img_path):
                  image_counter += 1
                  image_description = f"{image_text_description}{image_counter}"
                  pdf.ln(5)
                  pdf.image(img_path, x=10, y=10, w=190)
                  pdf.cell(0,8, image_description)
                  pdf.ln(10)
                else:
                  print(f"Warning: Image File not found at {img_path}. Skipping")
                  pass
              pdf.output(output_pdf_path)
              return f"PDF report successfully created at: {output_pdf_path}."
           except Exception as e:
                return f"Failed to create PDF report: {e}"



# Creating Agents


In [None]:
import os
from google.colab import userdata
from crewai_tools import SerperDevTool, FirecrawlSearchTool, FirecrawlScrapeWebsiteTool
from langchain_google_genai import ChatGoogleGenerativeAI
from crewai import LLM

os.environ["SERPER_API_KEY"] = userdata.get("SERPER_API_KEY")
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")
os.environ["FIRECRAWL_API_KEY"] = userdata.get("FIRECRAWL_API_KEY")


# tools for web related tasks
web_search_tool = SerperDevTool()
web_crawl_tool = FirecrawlSearchTool()
web_crawl_scrape_tool = FirecrawlScrapeWebsiteTool()
# tools for summarizing/creating graphics for the summary/creating a pdf with the summary
generate_plot_tool = GeneratePlotTool()
create_pdf_tool = CreatePDFReportTool()

#llms tailored to every agent
gemini_llm_archivist = LLM(
    api_key=userdata.get("GOOGLE_API_KEY"),
    model='gemini/gemini-2.5-flash'
)
gemini_llm_shadow = LLM(
    api_key=userdata.get("GOOGLE_API_KEY"),
    model='gemini/gemini-2.5-pro'
)
gemini_llm_seer = LLM(
    api_key=userdata.get("GOOGLE_API_KEY"),
    model='gemini/gemini-2.5-pro'
)
gemini_llm_nexus = LLM(
    api_key=userdata.get("GOOGLE_API_KEY"),
    model='gemini/gemini-2.5-pro'
)
# gemini_llm_archivist = ChatGoogleGenerativeAI(
#     model='gemini/gemini-2.5-flash',
#     temperature=0.3
# )
# gemini_llm_shadow = ChatGoogleGenerativeAI(
#     model='gemini/gemini-2.5-pro',
#     temperature=0.4
# )
# gemini_llm_seer = ChatGoogleGenerativeAI(
#     model='gemini/gemini-2.5-pro',
#     temperature=0.5
# )
# gemini_llm_nexus = ChatGoogleGenerativeAI(
#     model='gemini/gemini-2.5-pro',
#     temperature=0.3
# )

Archivist = Agent(
    role="Expert in finding relevant market data",
    goal="Efficiently collect comprehensive, relevant and up-to-date information, industry reports and news, from reliable sources",
    backstory="You are 'Archivist', a world-renowned, AI & Tech Intelligence Specialist from a top-tier global market research and technology analysis firm. Your unparalleled skill lies in meticulously extracting and verifying raw market data, cutting-edge research papers, industry reports, and real-time news from sources you consider trustworthy, reliable, and important within the rapidly evolving AI and LLM landscape. You pride yourself on your speed, accuracy, and ability to unearth the most relevant, granular information that others overlook. You are currently serving 'MostlyOpenAI,' a leading developer of enterprise-grade, highly customizable LLMs, providing them with the foundational intelligence they need.",
    llm=gemini_llm_archivist,
    tools=[web_search_tool, web_crawl_tool, web_crawl_scrape_tool],
    verbose=True
)
Shadow = Agent(
    role="Expert in dissecting competitor strategies",
    goal='Provide detailed, relevant and useful insights into competitor products, pricing, marketing, general business strategies and market position',
    backstory="You are 'Shadow', a highly decorated Competitive Intelligence Strategist, formerly leading the CI division for a Fortune 100 tech giant. Your expertise is in dissecting competitor moves, product launches, pricing models, marketing campaigns and business strategies with surgical precision. Currently, your mission is to provide 'MostlyOpenAI,' a specialist in enterprise-grade, customizable LLMs, with a clear understanding of its rivals. You think like a rival CEO, anticipating their next move and providing actionable insights into their vulnerabilities and strengths.",
    llm=gemini_llm_shadow,
    tools=[web_crawl_scrape_tool],
    verbose=True
)
Seer = Agent(
    role='Expert analyst in identifying critical shifts',
    goal='With a strong foundation on ground truths, detect emerging market trends, technological advancements and changes in consumer behavior',
    backstory="You are 'Seer', an innovative Futures and Trends Forecaster with a track record of predicting significant market shifts years in advance for leading consultancies. Your methods combine deep pattern recognition with an intuitive grasp of socio-economic and technological currents. You excel at identifying nascent trends and disruptive innovations that will shape tomorrow's markets. Your current focus is exclusively on the rapidly evolving landscape of enterprise-grade LLMs, aiming to guide 'MostlyOpenAI' toward future opportunities.",
    llm=gemini_llm_seer,
    tools=[web_crawl_scrape_tool],
    verbose=True
)
Nexus = Agent(
    role='Expert in concise and actionable reporting',
    goal='Consolidate all gathered relevant insights into clear, summarized reports with actionable recommendations for business strategy',
    backstory="You are 'Nexus', the Chief Insights Architect for an exclusive executive advisory board. Your unique talent is transforming vast, complex datasets and disparate analyses into crisp, compelling, and actionable strategic reports. You possess an unparalleled ability to synthesize information, highlight key takeaways, and craft narratives that directly inform C-suite decisions. Your current mandate is to deliver these critical strategic insights directly to the leadership of 'MostlyOpenAI,' enabling them to make informed product and market decisions for their enterprise LLM offerings.",
    llm=gemini_llm_nexus,
    verbose=True,
    tools=[generate_plot_tool, create_pdf_tool]
)


# Defining Tasks

In [None]:
# research tasks
identify_key_market_segments = Task(
    description="Identify and list the primary market segments within the enterprise-grade LLM industry (e.g., finance, healthcare, legal, specialized customer service, R&D). For each segment, provide a brief overview of its specific LLM needs and growth potential.",
    expected_output="Format JSON, A structured list of 3-5 key enterprise LLM market segments, with a 1-2 paragraph summary for each, including estimated market size and growth rates if available. Name the output ‘key_market_segments’",
    agent=Archivist,
    output_key='key_market_segments'
)
collect_reports_news = Task(
    description="- Conduct an exhaustive search for the most recent and highly relevant industry reports, whitepapers, research papers, academic studies, and significant news articles (published within the last 4 months) pertaining to the enterprise-grade LLM market. Focus specifically on: 1. Adoption rates and success stories within the previously identified market segments. 2. Technological advancements and new model architectures relevant to enterprise deployment (e.g., efficiency, fine-tuning, security). 3. Specific use cases and implementation challenges faced by businesses within these segments. 4. Market dynamics, investment trends, and significant partnerships within the LLM ecosystem. Prioritize information from top-tier technology research firms, leading academic institutions, and highly reputable industry news sources. The goal is to gather a diverse and comprehensive collection of foundational documents.",
    expected_output="Format JSON, A curated, categorized list of high-quality external resources (links to PDFs or web articles). Each link must be accompanied by a concise summary highlighting its main findings or key contribution to understanding the enterprise LLM market within the specified segments. The collection should be sufficiently broad and deep to inform subsequent detailed analysis by other agents. Name the output: ‘key_market_research’",
    context=[identify_key_market_segments],
    agent=Archivist,
    output_key='key_market_research'
)
# business analysis tasks
profile_competitor = Task(
    description="Based on the {key_market_research} and its analysis of relevant market segments, identify and create a detailed profile for the top 3-5 direct competitors to 'MostlyOpenAI' in the enterprise-grade LLM space (e.g., OpenAI's enterprise offerings, Google Cloud AI, Anthropic). For each competitor, focus on their key enterprise LLM products, reported pricing models (if public), target industries, strategic partnerships and actions, and recent significant announcements.",
    expected_output="""A JSON object structured with two main keys: 'plot_data' and 'summary_text'.
    The 'plot_data' key should contain a dictionary suitable for plotting, with 'labels' (List[str] of competitor names)
    and 'values' (List[float] of a key comparable metric like 'average reported pricing' or 'number of features', whichever is most appropriate for a single plot).
    The 'summary_text' key should contain a comprehensive markdown string detailing each competitor's profile, including
    product names, features, pricing, clients/industries, and collaborations.

    Example:
    {
      "plot_data": {
        "labels": ["CompA", "CompB", "CompC"],
        "values": [50000.0, 75000.0, 60000.0]
      },
      "summary_text": "## Competitor Profiles\\n\\n### CompA\\n- Product: AlphaLLM\\n- Features: Scalability, Customization, Fine-tuning\\n- Pricing: ~$50,000/year (reported)\\n- Clients: Finance, Healthcare\\n- Collaborations: CloudCorp\\n\\n### CompB\\n- Product: BetaAI\\n- Features: Multimodal, Real-time processing\\n- Pricing: ~$75,000/year (reported)\\n- Clients: Retail, Automotive\\n- Collaborations: DataSolutions\\n\\n### CompC\\n- Product: GammaGen\\n- Features: Security, On-premise deployment\\n- Pricing: ~$60,000/year (reported)\\n- Clients: Government, Defense\\n- Collaborations: CyberSec Inc."
    }
    """,
    context=[collect_reports_news],
    agent=Shadow,
    paralell=True,
    output_key='competitor_profiles'
)
analyze_comp_markt_position = Task(
    description="Examine the marketing messaging, public statements, and positioning strategies of the {competitor_profiles} you've identified. Identify their unique selling propositions (USPs) for enterprise clients, their ethical AI stances, and how they address concerns like data privacy, model explainability, and compliance in their public communications and product documentation.",
    expected_output="""A JSON object structured with two main keys: 'plot_data' and 'summary_text'.
    The 'plot_data' key should contain a dictionary suitable for plotting, with 'labels' (List[str] of competitor names)
    and 'values' (List[float] representing a quantifiable comparative metric like 'score for data privacy focus',
    'number of stated USPs', or 'overall ethical emphasis score').
    The 'summary_text' key should contain a comprehensive markdown string detailing the comparative marketing analysis,
    including specific examples of USPs, ethical principles, and data privacy/explainability approaches for each competitor.

    Example:
    {
      "plot_data": {
        "labels": ["CompA", "CompB", "CompC"],
        "values": [8.5, 7.0, 9.2]
      },
      "summary_text": "## Competitor Marketing & Positioning Analysis\\n\\n### CompA\\n- **USP:** 'Hyper-Scalable AI for Enterprise Growth'\\n- **Ethical Stance:** Emphasizes 'Responsible AI deployment with human oversight'.\\n- **Data Privacy:** Strong focus on 'on-premise deployment options and certified data isolation'.\\n\\n### CompB\\n- **USP:** 'AI for Seamless Multimodal Interaction'\\n- **Ethical Stance:** Highlights 'AI fairness and bias mitigation through audited datasets'.\\n- **Data Privacy:** Advocates for 'federated learning to protect sensitive client data'."
    }
    """,
    context=[profile_competitor],
    agent=Shadow,
    output_key='competitor_marketing_analysis'
)
# trends and shifts analysis tasks
identify_trends = Task(
    description="Analyze the {key_market_research} to pinpoint 3-5 cutting-edge technological advancements, new research paradigms (e.g., novel architectures, multimodal LLMs, efficient training methods), or significant shifts in LLM development that hold the highest potential to disrupt or redefine the enterprise LLM market in the next 1-3 years. Focus on trends with clear implications for 'MostlyOpenAI's' product roadmap or strategic direction.",
    expected_output="""A JSON object structured with two main keys: 'plot_data' and 'summary_text'.
    The 'plot_data' key should contain a dictionary suitable for plotting, with 'labels' (List[str] of trend names)
    and 'values' (List[float] representing a quantifiable priority score, impact score, or adoption rate potential).
    The 'summary_text' key should contain a comprehensive markdown string detailing each emerging technology/research trend,
    including its explanation, potential enterprise impact, and specific source citations.

    Example:
    {
      "plot_data": {
        "labels": ["RAG", "MoE", "Small LLMs"],
        "values": [9.0, 8.5, 7.5]
      },
      "summary_text": "## Emerging LLM Technologies & Trends\\n\\n### 1. Retrieval-Augmented Generation (RAG)\\n- **Explanation:** Combines LLMs with external knowledge bases to provide up-to-date and factual responses.\\n- **Impact:** Reduces hallucinations, provides citations, ideal for enterprise knowledge management.\\n- **Sources:** Smith et al. (2023) 'RAG in Practice'.\\n\\n### 2. Mixture-of-Experts (MoE) Architectures\\n- **Explanation:** LLMs with specialized 'expert' subnetworks activated selectively per query, improving efficiency and scalability.\\n- **Impact:** More cost-effective, faster inference for large models, enabling larger scale enterprise deployments.\\n- **Sources:** Google DeepMind (2024) 'Scaling MoE Models'."
    }
    """,
    context=[collect_reports_news],
    agent=Seer,
    paralell=True,
    output_key='emerging_tech_trends'
)
identify_reg_ethic_shift = Task(
    description="Research and identify 2-3 significant emerging regulatory frameworks (e.g., AI Acts, data governance laws, industry-specific compliance standards) or evolving ethical considerations specifically impacting enterprise LLM deployment. Analyze their potential implications for 'MostlyOpenAI' and its clients, considering both challenges and opportunities.",
    expected_output="""A JSON object structured with two main keys: 'plot_data' and 'summary_text'.
    The 'plot_data' key should contain a dictionary suitable for plotting, with 'labels' (List[str] of regulatory/ethical trend names)
    and 'values' (List[float] representing a quantifiable impact score, urgency level, or perceived risk).
    The 'summary_text' key should contain a comprehensive markdown string detailing each regulatory or ethical trend,
    including its nature, potential impact on LLM development/deployment, and direct implications for enterprise LLM providers.

    Example:
    {
      "plot_data": {
        "labels": ["AI Act (EU)", "Data Sovereignty", "Bias Audits"],
        "values": [9.5, 8.0, 7.0]
      },
      "summary_text": "## Regulatory & Ethical Shifts in LLMs\\n\\n### 1. EU AI Act\\n- **Nature:** Comprehensive regulation classifying AI systems by risk level.\\n- **Impact:** Stricter compliance, increased development costs, potential market fragmentation.\\n- **Implications for MostlyOpenAI:** Requires extensive legal review, robust risk assessment frameworks, and potential re-design of high-risk components.\\n\\n### 2. Data Sovereignty Movement\\n- **Nature:** Demand for data to be processed and stored within national borders.\\n- **Impact:** Challenges global cloud LLM deployments, necessitates regional data centers.\\n- **Implications for MostlyOpenAI:** Need for localized infrastructure, data governance compliance for specific regions."
    }
    """,
    context=[collect_reports_news],
    agent=Seer,
    paralell=True,
    output_key='regulatory_ethical_shifts'
)
# summary tasks
compile_all = Task(
    description="Compile all insights and findings from the provided JSON contexts (competitor profiles, marketing analysis, "
        "emerging trends, and regulatory shifts) into a single, comprehensive C-level executive report."
        "Each JSON contains both 'plot_data' for charts and 'summary_text' for content."
        "\n\n**Report Generation Process:**"
        "\n1. **Data Extraction & Chart Planning:** Iterate through each provided JSON context. Extract all 'plot_data' and 'summary_text' components. Identify key data points and suitable chart types (bar or line) for visualization based on the analysis."
        "\n2. **Chart Creation:** For each identified 'plot_data', use the **'Generate Plot Image' tool** to create high-quality, relevant charts. Ensure each chart has a clear, descriptive title and appropriate axis labels. Save each chart image, making sure to track and remember all generated image file paths for later use."
        "\n3. **Comprehensive Report Text Synthesis:** Consolidate ALL the extracted 'summary_text' components into one cohesive, professional, and concise markdown report. Structure the report to include:"
        "\n   - An Executive Summary (1-2 paragraphs)."
        "\n   - Dedicated sections for: "
        "\n     a. Competitor Landscape Analysis (integrating competitor profiles and marketing analysis, including strategic takeaways for 'MostlyOpenAI')."
        "\n     b. Emerging Technologies & Trends (highlighting opportunities and potential disruptions from the emerging tech trends analysis)."
        "\n     c. Regulatory & Ethical Implications (outlining compliance needs and ethical positioning from the regulatory/ethical shifts analysis)."
        "\n   Explicitly reference each generated chart within the relevant sections (e.g., 'As shown in Figure 1, the Q3 growth trend...'). Ensure all relevant sources are clearly mentioned."
        "\n4. **Intermediate Text Output:** Save this complete, synthesized markdown report text to a file using the **'File Write Tool'**. Name it 'executive_report_content.md' in the 'output/' directory."
        "\n5. **Final PDF Assembly:** Utilize the **'Create PDF Report' tool**. Provide it with the path to the 'executive_report_content.md' file and the complete list of all chart image file paths generated. Title the final PDF 'Enterprise LLM Landscape: Executive Summary'. Save the final PDF as 'Enterprise_LLM_Report.pdf' in the 'output/reports/' directory."
        "\n\n**The final output of this task MUST be the absolute file path to the completed PDF report.**",
    expected_output="A string representing the absolute file path to the completed, visually rich PDF executive report, which includes all synthesized analysis, trends, shifts, and embedded charts. Example: 'output/reports/Enterprise_LLM_Report.pdf'",
    context=[identify_key_market_segments, collect_reports_news, profile_competitor, analyze_comp_markt_position, identify_trends, identify_reg_ethic_shift],
    agent=Nexus,
    output_file='output/reports/MostlyOpenAI_Market_Report.pdf',
    output_key='final_strategic_report'
)

# Setting up the crew

In [None]:
market_researcher_mostlyopenai_crew = Crew(
    agents = [Archivist, Shadow, Seer, Nexus],
    tasks = [identify_key_market_segments, collect_reports_news, profile_competitor, analyze_comp_markt_position, identify_trends, identify_reg_ethic_shift, compile_all],
    process = Process.sequential,
    verbose=True
)
final_report_pdf_path = market_researcher_mostlyopenai_crew.kickoff()