In [1]:
print("ok")

ok


## System Overview
- “Generate a strategic report on the electric vehicle (EV) market and its key players”
and automatically do everything:
- research → analyze → generate → output final report.

## System Architecture

```
                 ┌──────────────────────────┐
                 │  User Query Interface    │
                 └────────────┬─────────────┘
                              ▼
                 ┌──────────────────────────┐
                 │     Task Orchestrator    │ ← (delegates steps)
                 └────────────┬─────────────┘
        ┌─────────────────────┼──────────────────────┐
        ▼                     ▼                      ▼
┌─────────────┐      ┌────────────────┐     ┌────────────────────┐
│ Data Fetcher│      │  NLP Analyzer  │     │ Report Generator   │
│ (Web/API)   │      │  (Trends/Comp) │     │ (Formatted Output) │
└─────────────┘      └────────────────┘     └────────────────────┘
        ▼                                          ▼
                       ┌────────────────────────┐
                       │ PDF / HTML Final Report│
                       └────────────────────────┘


```
🛠 Tools/Stack Recommendation:

Module	Tool/Library
Web UI (optional)                    	Streamlit / React
Backend	                                FastAPI / Flask
Data Fetching	                        SerpAPI / NewsAPI
Analysis	                            OpenAI / HuggingFace
Agent Chain	                            LangChain
Report Formatting	                    Jinja2 + WeasyPrint
Output	                                PDF/HTML

In [1]:
import os 
from dotenv import load_dotenv
from langchain.llms import OpenAI

In [2]:
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [3]:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [4]:
from langchain.document_loaders import PyPDFLoader

In [5]:
%pwd

'f:\\ML for NLP\\Class\\Web scrapping task\\Electrical_car'

In [9]:
import PyPDF2

# load dataset(pdf)
file_path = "ev_articles.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [10]:
print(data[0].page_content[:500])

The Biggest Winners From Tesla's Sales Slump - Business Insider
Date: None
Tesla is losing ground at home and globallyas its sales continue a downward slide, ceding market
share to automakers that have flooded the zone with new models in recent years.
General Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first
quarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to
first-quarter data from Cox Automotive.
In Europe, homegr


In [12]:
len(data)

5

In [13]:
# Extract all data one by one pages.
question_gen = ""
for page in data:
    question_gen += page.page_content

In [14]:
question_gen

"The Biggest Winners From Tesla's Sales Slump - Business Insider\nDate: None\nTesla is losing ground at home and globallyas its sales continue a downward slide, ceding market\nshare to automakers that have flooded the zone with new models in recent years.\nGeneral Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first\nquarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to\nfirst-quarter data from Cox Automotive.\nIn Europe, homegrown automakers such as BMW and VW have seen their electric vehicle\nbusinesses skyrocket, while Chinese brands like BYD and Geely have also made a major push into\nthe continent.\nTesla sold 128,100 vehicles in the United States during the first quarter of the year, down 8.6% from\nthe same time last year and a whopping 21% decline compared with 2023, Cox said.\nDespite seeing its share of the US EV market fall from 51% to 44% over the past year, per the Cox\ndata, Tesla remains the bigg

In [16]:
from langchain.text_splitter import TokenTextSplitter

In [18]:
# Split the question text into chunks
splitter_question_gen = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size= 10000,
    chunk_overlap = 200
)

chunk_ques_gen = splitter_question_gen.split_text(question_gen)

In [19]:
chunk_ques_gen

["The Biggest Winners From Tesla's Sales Slump - Business Insider\nDate: None\nTesla is losing ground at home and globallyas its sales continue a downward slide, ceding market\nshare to automakers that have flooded the zone with new models in recent years.\nGeneral Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first\nquarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to\nfirst-quarter data from Cox Automotive.\nIn Europe, homegrown automakers such as BMW and VW have seen their electric vehicle\nbusinesses skyrocket, while Chinese brands like BYD and Geely have also made a major push into\nthe continent.\nTesla sold 128,100 vehicles in the United States during the first quarter of the year, down 8.6% from\nthe same time last year and a whopping 21% decline compared with 2023, Cox said.\nDespite seeing its share of the US EV market fall from 51% to 44% over the past year, per the Cox\ndata, Tesla remains the big

In [20]:
len(chunk_ques_gen)

1

In [21]:
type(chunk_ques_gen)

list

In [22]:
text_data = chunk_ques_gen[0]  # first chunk
print(type(text_data)) 

<class 'str'>


In [23]:
from langchain.docstore.document import Document

In [24]:
# Convert each chunk into a Document
document_ques_gen = [Document(page_content=t) for t in chunk_ques_gen]

In [25]:
document_ques_gen

[Document(metadata={}, page_content="The Biggest Winners From Tesla's Sales Slump - Business Insider\nDate: None\nTesla is losing ground at home and globallyas its sales continue a downward slide, ceding market\nshare to automakers that have flooded the zone with new models in recent years.\nGeneral Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first\nquarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to\nfirst-quarter data from Cox Automotive.\nIn Europe, homegrown automakers such as BMW and VW have seen their electric vehicle\nbusinesses skyrocket, while Chinese brands like BYD and Geely have also made a major push into\nthe continent.\nTesla sold 128,100 vehicles in the United States during the first quarter of the year, down 8.6% from\nthe same time last year and a whopping 21% decline compared with 2023, Cox said.\nDespite seeing its share of the US EV market fall from 51% to 44% over the past year, per t

In [26]:
type(document_ques_gen[0])

langchain_core.documents.base.Document

In [27]:
# Split the documents for answer generation
splitter_ans_gen = TokenTextSplitter(
    model_name="gpt-3.5-turbo",
    chunk_size=1000,
    chunk_overlap=100
)

document_answer_gen = splitter_ans_gen.split_documents(document_ques_gen)

In [28]:
document_answer_gen

[Document(metadata={}, page_content="The Biggest Winners From Tesla's Sales Slump - Business Insider\nDate: None\nTesla is losing ground at home and globallyas its sales continue a downward slide, ceding market\nshare to automakers that have flooded the zone with new models in recent years.\nGeneral Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first\nquarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to\nfirst-quarter data from Cox Automotive.\nIn Europe, homegrown automakers such as BMW and VW have seen their electric vehicle\nbusinesses skyrocket, while Chinese brands like BYD and Geely have also made a major push into\nthe continent.\nTesla sold 128,100 vehicles in the United States during the first quarter of the year, down 8.6% from\nthe same time last year and a whopping 21% decline compared with 2023, Cox said.\nDespite seeing its share of the US EV market fall from 51% to 44% over the past year, per t

In [29]:
len(document_answer_gen)

3

In [30]:

from langchain.chat_models import ChatOpenAI

In [31]:
# Set up LLM pipeline for question generation
llm_ques_gen_pipeline = ChatOpenAI(
    model='gpt-3.5-turbo',
    temperature=0.3
)

  llm_ques_gen_pipeline = ChatOpenAI(


In [32]:
splitter_ans_gen = TokenTextSplitter(
    model_name = 'gpt-3.5-turbo',
    chunk_size = 1000,
    chunk_overlap = 100
)

In [33]:
document_answer_gen = splitter_ans_gen.split_documents(
    document_ques_gen
)

In [34]:
document_answer_gen

[Document(metadata={}, page_content="The Biggest Winners From Tesla's Sales Slump - Business Insider\nDate: None\nTesla is losing ground at home and globallyas its sales continue a downward slide, ceding market\nshare to automakers that have flooded the zone with new models in recent years.\nGeneral Motors, Ford, and Volkswagen were some of the biggest winners in the US in the first\nquarter of 2025, during which the EV market grew by 11% to nearly 300,000 cars sold, according to\nfirst-quarter data from Cox Automotive.\nIn Europe, homegrown automakers such as BMW and VW have seen their electric vehicle\nbusinesses skyrocket, while Chinese brands like BYD and Geely have also made a major push into\nthe continent.\nTesla sold 128,100 vehicles in the United States during the first quarter of the year, down 8.6% from\nthe same time last year and a whopping 21% decline compared with 2023, Cox said.\nDespite seeing its share of the US EV market fall from 51% to 44% over the past year, per t

In [35]:
len(document_answer_gen)

3

In [36]:
type(document_answer_gen[0])

langchain_core.documents.base.Document

In [None]:
# ev_report_generator.py

import requests
from bs4 import BeautifulSoup
from datetime import datetime
from transformers import pipeline

# Optional: Use OpenAI or HuggingFace LLMs for generation

class EVReportGenerator:
    def _init_(self, query):
        self.query = query
        self.sources = []
        self.sections = {
            "Executive Summary": "",
            "Market Overview": "",
            "Key Trends": "",
            "Competitor Analysis": "",
            "Strategic Recommendations": "",
            "Appendices": ""
        }
        self.summarizer = pipeline("summarization")

    def fetch_web_data(self, search_terms):
        # Dummy static data for now
        # Replace with real web scraping or API search later
        example_urls = [
            "https://www.businessinsider.com/tesla-sales-slump-automakers-winning-gm-vw-byd-2025-4",
            "https://chargeasy.org/posts/the-ev-revolution-in-2025-a-comprehensive-look-at-market-trends-innovations-and-challenges"
        ]
        for url in example_urls:
            try:
                response = requests.get(url, timeout=10)
                soup = BeautifulSoup(response.text, "html.parser")
                paragraphs = [p.get_text() for p in soup.find_all("p")[:10]]
                self.sources.append("\n".join(paragraphs))
            except Exception as e:
                print(f"Error fetching {url}: {e}")

    def analyze_and_summarize(self):
        combined_text = "\n\n".join(self.sources)[:4000]
        summary = self.summarizer(combined_text, max_length=500, min_length=100, do_sample=False)[0]['summary_text']
        return summary

    def fill_sections(self):
        summary = self.analyze_and_summarize()
        self.sections["Executive Summary"] = summary
        self.sections["Market Overview"] = "Global sales, regional insights, and forecasted trends."
        self.sections["Key Trends"] = "Solid-state batteries, charging infrastructure, government incentives."
        self.sections["Competitor Analysis"] = "Tesla, GM, BYD, VW, market share dynamics."
        self.sections["Strategic Recommendations"] = "Expand to new markets, invest in battery tech, secure supply chain."
        self.sections["Appendices"] = "Graph snapshots, financial tables, references."

    def generate_report(self):
        self.fetch_web_data(self.query)
        self.fill_sections()

        report = f"Electric Vehicle Market Intelligence Report\nGenerated on {datetime.now().strftime('%Y-%m-%d')}\n\n"
        for section, content in self.sections.items():
            report += f"## {section}\n{content}\n\n"
        return report

if _name_ == "_main_":
    query = "Generate a strategy intelligence report for the electric vehicle market and its key players."
    generator = EVReportGenerator(query)
    final_report = generator.generate_report()
    print(final_report)