In [None]:
from fpdf import FPDF

def generate_financial_report(pdf_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", style='B', size=16)
    pdf.cell(200, 10, "Company Financial Report", ln=True, align='C')
    
    pdf.ln(10)
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, """
    Company: XYZ Ltd.
    Fiscal Year: 2024
    Revenue: $1.2 Billion
    Net Profit: $150 Million
    Growth Rate: 12%
    
    Key Changes:
    - Expansion into new markets
    - Investment in AI-driven automation
    - Strategic partnerships with fintech firms
    
    Future Growth Prospects:
    - Expected revenue growth of 15% next year
    - New product line launching in Q3 2025
    - Increased R&D spending for innovation
    """)
    
    pdf.output(pdf_path)
    print(f"Financial report generated: {pdf_path}")

if __name__ == "__main__":
    generate_financial_report("financial_report.pdf")


In [None]:
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


In [None]:
import fitz  # PyMuPDF for PDF text extraction
import re
import spacy
import nltk
from textblob import TextBlob

In [None]:


# Load English NLP model
nlp = spacy.load("en_core_web_sm")

# Keywords for investment-related information
KEY_SECTIONS = [
    "growth prospects", "future outlook", "market trends", 
    "business risks", "financial outlook", "key triggers",
    "revenue forecast", "earnings report"
]

def extract_text_from_pdf(pdf_path):
    """Extracts text from a PDF file using PyMuPDF."""
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text

def extract_key_sections(text):
    """Extracts key sections related to investment analysis."""
    extracted_data = {}
    lines = text.split("\n")
    
    current_section = None
    for line in lines:
        line_lower = line.lower()
        for section in KEY_SECTIONS:
            if section in line_lower:
                current_section = section
                extracted_data[current_section] = []
        
        if current_section:
            extracted_data[current_section].append(line.strip())
    
    return extracted_data

def extract_financial_figures(text):
    """Extracts financial figures (revenues, percentages, and currency values)."""
    financial_data = re.findall(r"\$\d+(?:,\d{3})*(?:\.\d+)?|\d+%", text)
    return financial_data

def summarize_text(text):
    """Summarizes extracted key sections using NLP techniques."""
    blob = TextBlob(text)
    return blob.sentences[:5]  # Return the first 5 meaningful sentences

def analyze_pdf(pdf_path):
    """Main function to analyze the PDF and extract key investment insights."""
    text = extract_text_from_pdf(pdf_path)
    key_sections = extract_key_sections(text)
    financial_data = extract_financial_figures(text)

    insights = {}
    for section, content in key_sections.items():
        section_text = " ".join(content)
        summary = summarize_text(section_text)
        insights[section] = summary

    return {
        "financial_data": financial_data,
        "insights": insights
    }

if __name__ == "__main__":
    pdf_path = "company_report.pdf"  # Change to your PDF file
    report = analyze_pdf(pdf_path)

    print("=== Financial Figures Extracted ===")
    print(report["financial_data"])

    print("\n=== Key Investment Insights ===")
    for section, summary in report["insights"].items():
        print(f"\n[{section.upper()}]")
        for sentence in summary:
            print(f"- {sentence}")
