In [None]:
 #Install all required packages
!pip install yfinance
!pip install google-generativeai
!pip install sentence-transformers
!pip install faiss-cpu
!pip install pdfplumber
!pip install python-pptx
!pip install requests
!pip install ipywidgets
!pip install pandas
!pip install numpy

print("All packages installed successfully!")


In [None]:
#Import all required libraries
import os
import requests
import json
import yfinance as yf
import google.generativeai as genai
import pandas as pd
import numpy as np
import tempfile
import zipfile
from datetime import datetime, timedelta
import re

# Document processing
import pdfplumber
from pptx import Presentation

# RAG components
from sentence_transformers import SentenceTransformer
import faiss

# Colab specific
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML, Markdown

print("All libraries imported successfully!")


All libraries imported successfully!


In [None]:
# Configure API keys
from google.colab import userdata


GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
NEWS_API_KEY = userdata.get('NEWS_API_KEY')

# Configure Gemini
genai.configure(api_key=GEMINI_API_KEY)

print("APIs configured successfully!")


APIs configured successfully!


In [None]:
# Document Processing Class
class DocumentProcessor:
    """
    This class handles extraction of text from different document types
    """

    def __init__(self):
        print("Document Processor initialized")

    def extract_pdf_text(self, pdf_path):
        """
        Extract text from PDF files using pdfplumber
        """
        try:
            text = ""
            with pdfplumber.open(pdf_path) as pdf:
                for page_num, page in enumerate(pdf.pages):
                    page_text = page.extract_text()
                    if page_text:
                        text += f"\n--- Page {page_num + 1} ---\n"
                        text += page_text

            return self.clean_text(text)
        except Exception as e:
            print(f"Error processing PDF: {e}")
            return ""

    def extract_pptx_text(self, pptx_path):
        """
        Extract text from PowerPoint presentations
        """
        try:
            text = ""
            presentation = Presentation(pptx_path)

            for slide_num, slide in enumerate(presentation.slides):
                text += f"\n--- Slide {slide_num + 1} ---\n"

                for shape in slide.shapes:
                    if hasattr(shape, "text") and shape.text.strip():
                        text += shape.text + "\n"

            return self.clean_text(text)
        except Exception as e:
            print(f"Error processing PPTX: {e}")
            return ""

    def clean_text(self, text):
        """
        Clean and normalize extracted text
        """
        # Remove extra whitespace and normalize
        text = re.sub(r'\s+', ' ', text)
        # Remove special characters that might cause issues
        text = re.sub(r'[^\w\s\-\.\,\:\;\!\?\(\)\[\]]', '', text)
        return text.strip()

    def chunk_text(self, text, chunk_size=1000, overlap=100):
        """
        Split text into overlapping chunks for better context preservation
        """
        if len(text) <= chunk_size:
            return [text]

        chunks = []
        start = 0

        while start < len(text):
            end = start + chunk_size

            # Try to break at sentence boundary
            if end < len(text):
                # Look for sentence endings near the chunk boundary
                sentence_end = text.rfind('.', start, end)
                if sentence_end > start + chunk_size // 2:
                    end = sentence_end + 1

            chunk = text[start:end].strip()
            if chunk:
                chunks.append(chunk)

            start = end - overlap

        return chunks

# Test the document processor
doc_processor = DocumentProcessor()
print("Document Processor class created successfully!")


Document Processor initialized
Document Processor class created successfully!


In [None]:
# Financial Data Class
class FinancialDataFetcher:
    """
    This class fetches financial data from Yahoo Finance
    """

    def __init__(self):
        print("Financial Data Fetcher initialized")

    def get_company_info(self, ticker):
        """
        Get basic company information
        """
        try:
            stock = yf.Ticker(ticker)
            info = stock.info

            return {
                'company_name': info.get('longName', 'N/A'),
                'sector': info.get('sector', 'N/A'),
                'industry': info.get('industry', 'N/A'),
                'market_cap': info.get('marketCap', 0),
                'employee_count': info.get('fullTimeEmployees', 'N/A'),
                'website': info.get('website', 'N/A'),
                'business_summary': info.get('longBusinessSummary', 'N/A')
            }
        except Exception as e:
            print(f"Error fetching company info: {e}")
            return {}

    def get_financial_metrics(self, ticker):
        """
        Get key financial metrics and ratios
        """
        try:
            stock = yf.Ticker(ticker)
            info = stock.info

            metrics = {
                'current_price': info.get('currentPrice', 0),
                'market_cap': info.get('marketCap', 0),
                'pe_ratio': info.get('trailingPE', 'N/A'),
                'forward_pe': info.get('forwardPE', 'N/A'),
                'peg_ratio': info.get('pegRatio', 'N/A'),
                'price_to_book': info.get('priceToBook', 'N/A'),
                'ev_ebitda': info.get('enterpriseToEbitda', 'N/A'),
                'profit_margin': info.get('profitMargins', 'N/A'),
                'revenue_growth': info.get('revenueGrowth', 'N/A'),
                'debt_to_equity': info.get('debtToEquity', 'N/A'),
                'return_on_equity': info.get('returnOnEquity', 'N/A'),
                'dividend_yield': info.get('dividendYield', 'N/A'),
                'beta': info.get('beta', 'N/A')
            }

            return metrics
        except Exception as e:
            print(f"Error fetching financial metrics: {e}")
            return {}

    def get_recent_performance(self, ticker, period="1y"):
        """
        Get recent stock performance data
        """
        try:
            stock = yf.Ticker(ticker)
            hist = stock.history(period=period)

            if hist.empty:
                return {}

            current_price = hist['Close'].iloc[-1]
            start_price = hist['Close'].iloc[0]

            performance = {
                'period_return': ((current_price - start_price) / start_price) * 100,
                '52_week_high': hist['Close'].max(),
                '52_week_low': hist['Close'].min(),
                'avg_volume': hist['Volume'].mean(),
                'volatility': hist['Close'].pct_change().std() * np.sqrt(252) * 100  # Annualized
            }

            return performance
        except Exception as e:
            print(f"Error fetching performance data: {e}")
            return {}

# Test the financial data fetcher
financial_fetcher = FinancialDataFetcher()
print("Financial Data Fetcher class created")


Financial Data Fetcher initialized
Financial Data Fetcher class created


In [None]:
# News Data Class
class NewsDataFetcher:
    """
    This class fetches recent news using News API
    """

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://newsapi.org/v2/everything"
        print("News Data Fetcher initialized")

    def get_company_news(self, company_name, ticker, days_back=30):
        """
        Get recent news about the company
        """
        try:
            # Calculate date range
            end_date = datetime.now()
            start_date = end_date - timedelta(days=days_back)

            # Search queries (try both company name and ticker)
            queries = [
                f'"{company_name}"',
                f'"{ticker}"',
                f'{company_name} stock',
                f'{company_name} earnings'
            ]

            all_articles = []

            for query in queries:
                params = {
                    'q': query,
                    'from': start_date.strftime('%Y-%m-%d'),
                    'to': end_date.strftime('%Y-%m-%d'),
                    'sortBy': 'relevancy',
                    'pageSize': 10,  # Limit to avoid hitting API limits
                    'language': 'en',
                    'apiKey': self.api_key
                }

                response = requests.get(self.base_url, params=params)

                if response.status_code == 200:
                    data = response.json()
                    articles = data.get('articles', [])
                    all_articles.extend(articles)
                else:
                    print(f"Error fetching news for query '{query}': {response.status_code}")

            # Remove duplicates and filter relevant articles
            unique_articles = self.filter_and_deduplicate(all_articles, company_name, ticker)

            return unique_articles[:20]  # Return top 20 most relevant

        except Exception as e:
            print(f"Error fetching news: {e}")
            return []

    def filter_and_deduplicate(self, articles, company_name, ticker):
        """
        Remove duplicate articles and filter for relevance
        """
        seen_titles = set()
        filtered_articles = []

        for article in articles:
            title = article.get('title', '')
            description = article.get('description', '')

            # Skip if we've seen this title before
            if title in seen_titles:
                continue

            # Check if article is relevant (contains company name or ticker)
            text_to_check = (title + ' ' + description).lower()
            if (company_name.lower() in text_to_check or
                ticker.lower() in text_to_check):

                seen_titles.add(title)
                filtered_articles.append({
                    'title': title,
                    'description': description,
                    'published_at': article.get('publishedAt', ''),
                    'source': article.get('source', {}).get('name', ''),
                    'url': article.get('url', '')
                })

        return filtered_articles

    def format_news_for_context(self, articles):
        """
        Format news articles into text for RAG system
        """
        if not articles:
            return "No recent news available."

        news_text = "Recent News:\n\n"

        for i, article in enumerate(articles, 1):
            news_text += f"{i}. {article['title']}\n"
            if article['description']:
                news_text += f"   {article['description']}\n"
            news_text += f"   Source: {article['source']} | Date: {article['published_at'][:10]}\n\n"

        return news_text

# Test the news fetcher (will need valid API key)
if NEWS_API_KEY:
    news_fetcher = NewsDataFetcher(NEWS_API_KEY)
    print("News Data Fetcher class created")
else:
    print("News API key not found")


News Data Fetcher initialized
News Data Fetcher class created


In [None]:
# # Cell 7: RAG System - Retrieval Augmented Generation
# class RAGSystem:


#     def __init__(self, model_name='all-MiniLM-L6-v2'):

#         print("Initializing RAG System...")
#         print("Loading embedding model (this may take a moment)...")

#         # Load the sentence transformer model
#         # This model converts text into numerical vectors (embeddings)
#         self.encoder = SentenceTransformer(model_name)

#         # Initialize variables for our vector database
#         self.index = None  # This will store our FAISS index
#         self.chunks = []   # This will store our original text chunks
#         self.chunk_metadata = []  # This will store metadata about each chunk

#         print("RAG System initialized successfully!")

#     def create_embeddings(self, text_chunks, metadata_list=None):

#         print(f"Creating embeddings for {len(text_chunks)} text chunks...")

#         # Store the original text chunks
#         self.chunks = text_chunks
#         self.chunk_metadata = metadata_list or [f"chunk_{i}" for i in range(len(text_chunks))]

#         # Convert text chunks to embeddings
#         # Each chunk becomes a vector of numbers representing its meaning
#         embeddings = self.encoder.encode(text_chunks, show_progress_bar=True)

#         # Create FAISS index for fast similarity search
#         # FAISS is a library by Facebook for efficient similarity search
#         dimension = embeddings.shape[1]  # Size of each embedding vector

#         # IndexFlatIP uses Inner Product for similarity (good for normalized embeddings)
#         self.index = faiss.IndexFlatIP(dimension)

#         # Normalize embeddings for better similarity search
#         faiss.normalize_L2(embeddings)

#         # Add embeddings to the index
#         self.index.add(embeddings.astype('float32'))

#         print(f"✅ Created embeddings database with {self.index.ntotal} chunks")

#     def retrieve_relevant_chunks(self, query, top_k=5):

#         if self.index is None or len(self.chunks) == 0:
#             print("❌ No embeddings database found. Please create embeddings first.")
#             return []

#         print(f"🔍 Searching for relevant information about: '{query[:50]}...'")

#         # Convert query to embedding
#         query_embedding = self.encoder.encode([query])
#         faiss.normalize_L2(query_embedding)

#         # Search for similar chunks
#         scores, indices = self.index.search(query_embedding.astype('float32'), top_k)

#         # Retrieve the actual text chunks
#         relevant_chunks = []
#         for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
#             if idx < len(self.chunks):  # Make sure index is valid
#                 relevant_chunks.append({
#                     'text': self.chunks[idx],
#                     'metadata': self.chunk_metadata[idx],
#                     'similarity_score': float(score),
#                     'rank': i + 1
#                 })

#         print(f"✅ Found {len(relevant_chunks)} relevant chunks")
#         return relevant_chunks

#     def get_context_for_query(self, query, max_context_length=3000):
#         relevant_chunks = self.retrieve_relevant_chunks(query, top_k=8)

#         if not relevant_chunks:
#             return "No relevant information found in documents."

#         # Combine chunks into context, respecting length limits
#         context = ""
#         for chunk in relevant_chunks:
#             chunk_text = f"\n--- Relevant Information (Score: {chunk['similarity_score']:.3f}) ---\n"
#             chunk_text += chunk['text'] + "\n"

#             if len(context + chunk_text) > max_context_length:
#                 break

#             context += chunk_text

#         return context

# # Initialize RAG system
# print("Creating RAG System...")
# rag_system = RAGSystem()
# print("RAG System created successfully!")


In [None]:
# Cell 7: RAG System - Retrieval Augmented Generation (with debugging)
class RAGSystem:

    def __init__(self, model_name='all-MiniLM-L6-v2'):
        print("🚀 Initializing RAG System...")
        print(f"📥 Loading embedding model: {model_name} (this may take a moment)...")

        # Load sentence transformer model
        self.encoder = SentenceTransformer(model_name)

        # Initialize variables for our vector database
        self.index = None  # store FAISS index
        self.chunks = []   # store original text chunks
        self.chunk_metadata = []  # store metadata about each chunk

        print("✅ RAG System initialized successfully!")

    def create_embeddings(self, text_chunks, metadata_list=None):
        print(f"\n🧩 Creating embeddings for {len(text_chunks)} text chunks...")

        # Store original text chunks
        self.chunks = text_chunks
        self.chunk_metadata = metadata_list or [f"chunk_{i}" for i in range(len(text_chunks))]

        # Convert text chunks to embeddings
        # Each chunk becomes a vector of numbers representing its meaning
        embeddings = self.encoder.encode(text_chunks, show_progress_bar=True)

        dimension = embeddings.shape[1]
        print(f"📐 Each embedding vector has {dimension} dimensions")

        # Create FAISS index
        self.index = faiss.IndexFlatIP(dimension)

        # Normalize embeddings for better similarity search
        faiss.normalize_L2(embeddings)

        # Add to index
        self.index.add(embeddings.astype('float32'))

        print(f"✅ Embedding database created with {self.index.ntotal} chunks")

    def retrieve_relevant_chunks(self, query, top_k=5):
        if self.index is None or len(self.chunks) == 0:
            print("❌ No embeddings found! Please run create_embeddings() first.")
            return []

        print(f"\n🔍 Query received: \"{query}\"")
        query_embedding = self.encoder.encode([query])
        print(f"📐 Query embedding shape: {query_embedding.shape}")

        faiss.normalize_L2(query_embedding)

        scores, indices = self.index.search(query_embedding.astype('float32'), top_k)

        print(f"📊 Top {top_k} results retrieved from FAISS index:")
        relevant_chunks = []
        for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
            if idx < len(self.chunks):
                chunk_text = self.chunks[idx][:100].replace("\n", " ") + "..."  # preview first 100 chars
                print(f"   {i+1}. Score={score:.4f}, ChunkID={idx}, Metadata={self.chunk_metadata[idx]}")
                print(f"      Preview: {chunk_text}")
                relevant_chunks.append({
                    'text': self.chunks[idx],
                    'metadata': self.chunk_metadata[idx],
                    'similarity_score': float(score),
                    'rank': i + 1
                })

        return relevant_chunks

    def get_context_for_query(self, query, max_context_length=3000):
        print("\n📌 Assembling context for query...")
        relevant_chunks = self.retrieve_relevant_chunks(query, top_k=8)

        if not relevant_chunks:
            print("⚠️ No relevant chunks found.")
            return "No relevant information found in documents."

        context = ""
        for chunk in relevant_chunks:
            chunk_text = (
                f"\n--- Relevant Information (Score: {chunk['similarity_score']:.3f}) ---\n"
                + chunk['text'] + "\n"
            )
            if len(context + chunk_text) > max_context_length:
                print("⚠️ Context length limit reached, stopping assembly. [SAFTETY CHECK]")
                break
            context += chunk_text

        print(f"✅ Final context assembled (length: {len(context)} characters)")
        return context


# Initialise RAG system
print("🔧 Creating RAG System instance...")
rag_system = RAGSystem()
print("🎉 RAG System ready to use!")


🔧 Creating RAG System instance...
🚀 Initializing RAG System...
📥 Loading embedding model: all-MiniLM-L6-v2 (this may take a moment)...
✅ RAG System initialized successfully!
🎉 RAG System ready to use!


In [None]:
# Cell 8: AI Memo Generator
class MemoGenerator:


    def __init__(self):
        self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
        print("Memo Generator initialized")

    def generate_executive_summary(self, company_info, financial_data, context):


        prompt = f"""
        You are a professional investment analyst. Create a concise executive summary for an investment memo.

        Company Information:
        - Name: {company_info.get('company_name', 'N/A')}
        - Sector: {company_info.get('sector', 'N/A')}
        - Industry: {company_info.get('industry', 'N/A')}
        - Market Cap: ${financial_data.get('market_cap', 0):,}

        Key Financial Metrics:
        - Current Price: ${financial_data.get('current_price', 0):.2f}
        - P/E Ratio: {financial_data.get('pe_ratio', 'N/A')}
        - Revenue Growth: {financial_data.get('revenue_growth', 'N/A')}
        - Profit Margin: {financial_data.get('profit_margin', 'N/A')}

        Additional Context from Documents:
        {context}

        Write a professional executive summary (3-4 sentences) covering:
        1. Business overview and market position
        2. Recent financial performance
        3. Key value proposition

        Use a professional, analytical tone suitable for institutional investors.
        """



        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating executive summary: {e}"

    def generate_investment_risks(self, company_info, financial_data, context, news_context):


        prompt = f"""
        As an investment analyst, identify and analyze key investment risks for this company.

        Company: {company_info.get('company_name', 'N/A')}
        Sector: {company_info.get('sector', 'N/A')}

        Financial Health Indicators:
        - Debt to Equity: {financial_data.get('debt_to_equity', 'N/A')}
        - Beta: {financial_data.get('beta', 'N/A')}
        - Profit Margin: {financial_data.get('profit_margin', 'N/A')}

        Document Context:
        {context}

        Recent News Context:
        {news_context}

        Identify 4-5 key investment risks in these categories:
        1. Market/Industry risks
        2. Financial risks
        3. Operational risks
        4. Regulatory/External risks

        Format as bullet points with brief explanations. Be specific and actionable.
        """



        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating risks analysis: {e}"



    def generate_growth_drivers(self, company_info, financial_data, context, news_context):


        prompt = f"""
        As an investment analyst, identify key growth drivers and opportunities for this company.

        Company: {company_info.get('company_name', 'N/A')}
        Sector: {company_info.get('sector', 'N/A')}

        Growth Indicators:
        - Revenue Growth: {financial_data.get('revenue_growth', 'N/A')}
        - Return on Equity: {financial_data.get('return_on_equity', 'N/A')}
        - Market Cap: ${financial_data.get('market_cap', 0):,}

        Document Context:
        {context}

        Recent News Context:
        {news_context}

        Identify 4-5 key growth drivers focusing on:
        1. Market expansion opportunities
        2. Product/service innovation
        3. Operational efficiency improvements
        4. Strategic initiatives
        5. Market trends benefiting the company

        Format as bullet points with supporting rationale. Be specific about growth potential.
        """


        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating growth drivers: {e}"



    def generate_valuation_analysis(self, company_info, financial_data, performance_data, context):

        # Handle potential 'N/A' values for performance data
        period_return = performance_data.get('period_return', 'N/A')
        period_return_formatted = f"{period_return:.1f}%" if isinstance(period_return, (int, float)) else period_return

        week_52_high = performance_data.get('52_week_high', 'N/A')
        week_52_high_formatted = f"${week_52_high:.2f}" if isinstance(week_52_high, (int, float)) else week_52_high

        week_52_low = performance_data.get('52_week_low', 'N/A')
        week_52_low_formatted = f"${week_52_low:.2f}" if isinstance(week_52_low, (int, float)) else week_52_low


        prompt = f"""
        Provide a valuation analysis for this investment opportunity.

        Company: {company_info.get('company_name', 'N/A')}
        Current Price: ${financial_data.get('current_price', 0):.2f}
        Market Cap: ${financial_data.get('market_cap', 0):,}

        Valuation Metrics:
        - P/E Ratio: {financial_data.get('pe_ratio', 'N/A')}
        - Forward P/E: {financial_data.get('forward_pe', 'N/A')}
        - PEG Ratio: {financial_data.get('peg_ratio', 'N/A')}
        - Price to Book: {financial_data.get('price_to_book', 'N/A')}
        - EV/EBITDA: {financial_data.get('enterpriseToEbitda', 'N/A')}

        Performance Data:
        - 1-Year Return: {period_return_formatted}
        - 52-Week High: {week_52_high_formatted}
        - 52-Week Low: {week_52_low_formatted}

        Additional Context:
        {context}

        Provide a valuation assessment covering:
        1. Current valuation relative to historical ranges
        2. Comparison to sector averages (if known)
        3. Key valuation drivers
        4. Overall valuation conclusion (attractive/fair/expensive)

        Be analytical and data-driven in your assessment.
        """

        try:
            response = self.model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            return f"Error generating valuation analysis: {e}"




    def format_complete_memo(self, ticker, sections):
        """Format all sections into a complete memo"""
        current_date = datetime.now().strftime("%B %d, %Y")

        memo = f"""
# INVESTMENT MEMO: {ticker.upper()}
**Date:** {current_date}
**Analyst:** GenAI Investment Analyst

---

## EXECUTIVE SUMMARY
{sections.get('executive_summary', 'Not available')}

---

## INVESTMENT RISKS
{sections.get('risks', 'Not available')}

---

## GROWTH DRIVERS & OPPORTUNITIES
{sections.get('growth_drivers', 'Not available')}

---

## VALUATION ANALYSIS
{sections.get('valuation', 'Not available')}

---

*This memo was generated using AI analysis of company documents, financial data, and recent news.
Please verify all information and consult additional sources before making investment decisions.*
"""
        return memo

# Initialize memo generator
memo_generator = MemoGenerator()
print("Memo Generator created successfully!")

Memo Generator initialized
Memo Generator created successfully!


In [None]:
class InvestmentMemoApp:
# Cell 9: Enhanced Main Application Interface (Handles all 3 scenarios)
    """
    Main application class that handles:
    1. Ticker only (listed companies)
    2. Documents only (private companies)
    3. Ticker + Documents (in-depth analysis)
    """

    def __init__(self):
        self.doc_processor = DocumentProcessor()
        self.financial_fetcher = FinancialDataFetcher()
        self.news_fetcher = NewsDataFetcher(NEWS_API_KEY) if NEWS_API_KEY else None
        self.rag_system = RAGSystem()
        self.memo_generator = MemoGenerator()

        print("🚀 Investment Memo App initialized successfully!")

    def create_interface(self):
        """Create the enhanced user interface widgets"""
        print("Creating enhanced user interface...")

        # Enhanced ticker input with help text
        self.ticker_input = widgets.Text(
            value='',
            placeholder='Enter stock ticker (e.g., AAPL, RELIANCE.NS) - Optional for private companies',
            description='Stock Ticker:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='500px')
        )

        # File upload button
        self.upload_button = widgets.Button(
            description="📁 Upload Documents",
            button_style='info',
            layout=widgets.Layout(width='200px')
        )

        # Generate memo button
        self.generate_button = widgets.Button(
            description="🎯 Generate Memo",
            button_style='success',
            layout=widgets.Layout(width='200px')
        )

        # Output area
        self.output_area = widgets.Output()

        # Set up button callbacks
        self.upload_button.on_click(self.upload_documents)
        self.generate_button.on_click(self.generate_memo)

        # Store uploaded documents
        self.uploaded_docs = []

        # Display enhanced interface with instructions
        display(widgets.VBox([
            widgets.HTML("<h2>🏢 GenAI Investment Memo Generator</h2>"),
            widgets.HTML("""
                <div style='background-color: #f0f8ff; padding: 15px; border-radius: 5px; margin-bottom: 15px;'>
                    <h3>📋 Three Ways to Use This Tool:</h3>
                    <ul>
                        <li><strong>📈 Listed Companies:</strong> Enter ticker only (e.g., AAPL, RELIANCE.NS)</li>
                        <li><strong>🏢 Private Companies:</strong> Leave ticker blank, upload documents only</li>
                        <li><strong>🔍 In-depth Analysis:</strong> Enter ticker AND upload documents</li>
                    </ul>
                </div>
            """),
            self.ticker_input,
            widgets.HBox([self.upload_button, self.generate_button]),
            self.output_area
        ]))

    def upload_documents(self, button):
        """Handle document upload"""
        with self.output_area:
            print("\n \n 📤 Starting document upload...")
            uploaded = files.upload()

            self.uploaded_docs = []

            for filename, content in uploaded.items():
                # Save uploaded file temporarily
                with open(filename, 'wb') as f:
                    f.write(content)

                print(f"✅ Uploaded: {filename}")
                self.uploaded_docs.append(filename)

            print(f"📋 Total documents uploaded: {len(self.uploaded_docs)}")

    def validate_ticker(self, ticker):
        """Validate if ticker exists and get basic info"""
        if not ticker:
            return False, None

        try:
            # Quick validation by trying to get basic info
            stock = yf.Ticker(ticker)
            info = stock.info

            # Check if we got valid data
            if info and info.get('symbol') or info.get('shortName') or info.get('longName'):
                return True, info
            else:
                return False, None

        except Exception as e:
            return False, None

    def generate_memo(self, button):
        """Enhanced memo generation handling all three scenarios"""
        ticker = self.ticker_input.value.strip().upper()
        has_documents = len(self.uploaded_docs) > 0

        with self.output_area:
            print("🎯 Starting memo generation...")
            print("=" * 60)

            # Scenario validation
            if not ticker and not has_documents:
                print("❌ ERROR: Please provide either:")
                print("   • A stock ticker (for listed companies)")
                print("   • Upload documents (for private companies)")
                print("   • Both (for in-depth analysis)")
                return

            try:
                # Initialize variables
                company_info = {}
                financial_data = {}
                performance_data = {}
                company_name = "Unknown Company"
                is_listed_company = False

                # SCENARIO 1 & 3: Handle ticker (if provided)
                if ticker:
                    print(f"📊 Validating and fetching data for ticker: {ticker}")

                    is_valid, basic_info = self.validate_ticker(ticker)

                    if is_valid:
                        print(f"✅ Valid ticker found: {ticker}")
                        is_listed_company = True

                        # Get comprehensive financial data
                        company_info = self.financial_fetcher.get_company_info(ticker)
                        financial_data = self.financial_fetcher.get_financial_metrics(ticker)
                        performance_data = self.financial_fetcher.get_recent_performance(ticker)

                        company_name = company_info.get('company_name', ticker)
                        print(f"✅ Company: {company_name}")
                        print(f"✅ Sector: {company_info.get('sector', 'N/A')}")
                        print(f"✅ Market Cap: ${financial_data.get('market_cap', 0):,}")

                    else:
                        print(f"⚠️ Ticker '{ticker}' not found or invalid")
                        if not has_documents:
                            print("❌ Cannot proceed without valid ticker or documents")
                            return
                        else:
                            print("ℹ️ Proceeding with document-only analysis...")
                            ticker = ""  # Clear invalid ticker

                # SCENARIO 2 & 3: Handle documents (if uploaded)
                all_document_text = ""
                if has_documents:
                    print("📄 Processing uploaded documents...")

                    for doc_path in self.uploaded_docs:
                        try:
                            if doc_path.endswith('.pdf'):
                                text = self.doc_processor.extract_pdf_text(doc_path)
                            elif doc_path.endswith('.pptx'):
                                text = self.doc_processor.extract_pptx_text(doc_path)
                            else:
                                print(f"⚠️ Unsupported file format: {doc_path}")
                                continue

                            if text:
                                all_document_text += f"\n--- From {doc_path} ---\n{text}\n"
                                print(f"✅ Processed: {doc_path} ({len(text)} characters)")
                            else:
                                print(f"⚠️ No text extracted from: {doc_path}")

                        except Exception as e:
                            print(f"❌ Error processing {doc_path}: {e}")

                    print(f"✅ Total document text: {len(all_document_text)} characters")

                    # For private companies, try to extract company name from documents
                    if not is_listed_company and all_document_text:
                        company_name = self.extract_company_name_from_docs(all_document_text)
                        print(f"📋 Extracted company name from documents: {company_name}")

                else:
                    # Use business summary from financial data if available
                    if is_listed_company:
                        all_document_text = company_info.get('business_summary', '')
                        if all_document_text:
                            print("ℹ️ Using business summary from financial data")

                # Handle news (only for listed companies with known names)
                news_text = ""
                if self.news_fetcher and is_listed_company and company_name != "Unknown Company":
                    print("📰 Fetching recent news...")
                    try:
                        news_articles = self.news_fetcher.get_company_news(company_name, ticker)
                        if news_articles:
                            news_text = self.news_fetcher.format_news_for_context(news_articles)
                            print(f"✅ Found {len(news_articles)} relevant news articles")
                        else:
                            print("ℹ️ No recent news articles found")
                    except Exception as e:
                        print(f"⚠️ Error fetching news: {e}")

                # Create RAG embeddings (if we have any text)
                print("🧠 Creating knowledge base...")
                all_text = all_document_text + "\n\n" + news_text

                if all_text.strip():
                    text_chunks = self.doc_processor.chunk_text(all_text)
                    self.rag_system.create_embeddings(text_chunks)
                    print(f"✅ Knowledge base created with {len(text_chunks)} chunks")
                else:
                    print("⚠️ Limited text content available for analysis")

                # Generate memo sections
                print("✍️ Generating memo sections...")

                # Adapt context queries based on available information
                base_query = f"{company_name}" if company_name != "Unknown Company" else "company business"

                # Get relevant context for each section
                contexts = {}
                if all_text.strip():
                    contexts = {
                        'executive': self.rag_system.get_context_for_query(f"{base_query} business model strategy financial performance"),
                        'risks': self.rag_system.get_context_for_query(f"{base_query} risks challenges competition debt financial health"),
                        'growth': self.rag_system.get_context_for_query(f"{base_query} growth opportunities expansion innovation market trends"),
                        'valuation': self.rag_system.get_context_for_query(f"{base_query} valuation financial metrics earnings revenue")
                    }
                else:
                    # Use empty contexts if no text available
                    contexts = {key: "" for key in ['executive', 'risks', 'growth', 'valuation']}

                # Generate sections with enhanced prompts for different scenarios
                sections = {}

                print("  📝 Executive Summary...")
                sections['executive_summary'] = self.generate_adaptive_executive_summary(
                    company_name, company_info, financial_data, contexts['executive'], is_listed_company
                )

                print("  ⚠️ Investment Risks...")
                sections['risks'] = self.generate_adaptive_investment_risks(
                    company_name, company_info, financial_data, contexts['risks'], news_text, is_listed_company
                )

                print("  📈 Growth Drivers...")
                sections['growth_drivers'] = self.generate_adaptive_growth_drivers(
                    company_name, company_info, financial_data, contexts['growth'], news_text, is_listed_company
                )

                print("  💰 Valuation Analysis...")
                sections['valuation'] = self.generate_adaptive_valuation_analysis(
                    company_name, company_info, financial_data, performance_data, contexts['valuation'], is_listed_company
                )

                # Format and display complete memo
                print("📋 Formatting final memo...")
                memo_title = ticker if ticker else company_name
                complete_memo = self.format_adaptive_memo(memo_title, sections, is_listed_company)

                print("=" * 60)
                print("✅ MEMO GENERATION COMPLETED!")
                print(f"📊 Analysis Type: {'Listed Company' if is_listed_company else 'Private Company'}")
                print(f"📄 Documents Used: {len(self.uploaded_docs)}")
                print(f"📰 News Articles: {len(news_text.split('\\n\\n')) - 1 if news_text else 0}")
                print("=" * 60)

                # Display the memo
                display(Markdown(complete_memo))

                # Store memo for potential download
                self.last_memo = complete_memo
                self.last_ticker = ticker if ticker else company_name.replace(' ', '_')

                print("\n💾 Memo ready for download!")

            except Exception as e:
                print(f"❌ Error generating memo: {e}")
                import traceback
                traceback.print_exc()

    def extract_company_name_from_docs(self, text):
        """Try to extract company name from document text"""
        try:
            # Look for common patterns that might indicate company name
            lines = text.split('\n')[:20]  # Check first 20 lines

            for line in lines:
                line = line.strip()
                # Look for lines that might contain company name
                if any(keyword in line.lower() for keyword in ['company', 'corporation', 'corp', 'inc', 'ltd', 'limited']):
                    if len(line) < 100 and len(line) > 5:  # Reasonable length
                        return line

            # If no clear company name found, return generic name
            return "Private Company"

        except:
            return "Private Company"

    def generate_adaptive_executive_summary(self, company_name, company_info, financial_data, context, is_listed):
        """Generate executive summary adapted for listed vs private companies"""
        if is_listed:
            return self.memo_generator.generate_executive_summary(company_info, financial_data, context)
        else:
            # Custom prompt for private companies
            prompt = f"""
            Create a professional executive summary for a private company investment memo.

            Company Name: {company_name}

            Available Information from Documents:
            {context if context else "Limited information available from uploaded documents."}

            Write a concise executive summary (3-4 sentences) covering:
            1. Business overview and operations
            2. Key value propositions
            3. Market position or competitive advantages
            4. Investment highlights

            Use a professional, analytical tone suitable for private equity investors.
            Focus on the information available in the documents.
            """

            try:
                response = self.memo_generator.model.generate_content(prompt)
                return response.text.strip()
            except Exception as e:
                return f"Error generating executive summary: {e}"

    def generate_adaptive_investment_risks(self, company_name, company_info, financial_data, context, news_context, is_listed):
        """Generate investment risks adapted for listed vs private companies"""
        if is_listed:
            return self.memo_generator.generate_investment_risks(company_info, financial_data, context, news_context)
        else:
            # Custom prompt for private companies
            prompt = f"""
            Identify key investment risks for this private company.

            Company: {company_name}

            Available Information:
            {context if context else "Limited information available from documents."}

            Identify 4-5 key investment risks focusing on:
            1. Business/operational risks
            2. Market and competitive risks
            3. Financial and liquidity risks
            4. Management and execution risks
            5. Regulatory or external risks

            Format as bullet points. Be specific based on available information.
            If information is limited, focus on general private company investment risks.
            """

            try:
                response = self.memo_generator.model.generate_content(prompt)
                return response.text.strip()
            except Exception as e:
                return f"Error generating risks analysis: {e}"

    def generate_adaptive_growth_drivers(self, company_name, company_info, financial_data, context, news_context, is_listed):
        """Generate growth drivers adapted for listed vs private companies"""
        if is_listed:
            return self.memo_generator.generate_growth_drivers(company_info, financial_data, context, news_context)
        else:
            # Custom prompt for private companies
            prompt = f"""
            Identify key growth drivers and opportunities for this private company.

            Company: {company_name}

            Available Information:
            {context if context else "Limited information available from documents."}

            Identify 4-5 key growth drivers focusing on:
            1. Market expansion opportunities
            2. Product or service innovation
            3. Operational scaling potential
            4. Strategic partnerships or acquisitions
            5. Technology or process improvements

            Format as bullet points with supporting rationale.
            Base analysis on information available in the documents.
            """

            try:
                response = self.memo_generator.model.generate_content(prompt)
                return response.text.strip()
            except Exception as e:
                return f"Error generating growth drivers: {e}"

    def generate_adaptive_valuation_analysis(self, company_name, company_info, financial_data, performance_data, context, is_listed):
        """Generate valuation analysis adapted for listed vs private companies"""
        if is_listed and financial_data:
            return self.memo_generator.generate_valuation_analysis(company_info, financial_data, performance_data, context)
        else:
            # Custom prompt for private companies or limited financial data
            prompt = f"""
            Provide a valuation framework analysis for this investment opportunity.

            Company: {company_name}
            Type: {'Listed Company (Limited Data)' if is_listed else 'Private Company'}

            Available Information:
            {context if context else "Limited financial information available."}

            Provide valuation analysis covering:
            1. Relevant valuation methodologies for this type of company
            2. Key value drivers based on available information
            3. Comparable company considerations (if applicable)
            4. Risk factors affecting valuation
            5. Overall investment attractiveness assessment

            Be analytical and acknowledge limitations in available data.
            Focus on valuation framework rather than specific multiples if financial data is limited.
            """

            try:
                response = self.memo_generator.model.generate_content(prompt)
                return response.text.strip()
            except Exception as e:
                return f"Error generating valuation analysis: {e}"

    def format_adaptive_memo(self, memo_title, sections, is_listed_company):
        """Format memo with adaptive title and disclaimer"""
        current_date = datetime.now().strftime("%B %d, %Y")
        company_type = "Listed Company Analysis" if is_listed_company else "Private Company Analysis"

        memo = f"""
# INVESTMENT MEMO: {memo_title}
**Date:** {current_date}
**Analysis Type:** {company_type}
**Analyst:** GenAI Investment Analyst

---

## EXECUTIVE SUMMARY
{sections.get('executive_summary', 'Not available')}

---

## INVESTMENT RISKS
{sections.get('risks', 'Not available')}

---

## GROWTH DRIVERS & OPPORTUNITIES
{sections.get('growth_drivers', 'Not available')}

---

## VALUATION ANALYSIS
{sections.get('valuation', 'Not available')}

---

### DISCLAIMER
*This memo was generated using AI analysis of {'company documents, financial data, and recent news' if is_listed_company else 'uploaded company documents'}.
{'Public market data was sourced from Yahoo Finance and news from News API.' if is_listed_company else 'Analysis is based primarily on provided documents.'}
Please verify all information and consult additional sources before making investment decisions.*
"""
        return memo

# Create and display the enhanced app
print("🚀 Creating Enhanced Investment Memo App...")
app = InvestmentMemoApp()
app.create_interface()


🚀 Creating Enhanced Investment Memo App...
Document Processor initialized
Financial Data Fetcher initialized
News Data Fetcher initialized
🚀 Initializing RAG System...
📥 Loading embedding model: all-MiniLM-L6-v2 (this may take a moment)...
✅ RAG System initialized successfully!
Memo Generator initialized
🚀 Investment Memo App initialized successfully!
Creating enhanced user interface...


VBox(children=(HTML(value='<h2>🏢 GenAI Investment Memo Generator</h2>'), HTML(value="\n                <div st…

In [None]:
#  Download the generated memo
def download_memo():
    """Download the last generated memo as a text file"""
    if hasattr(app, 'last_memo') and app.last_memo:
        filename = f"{app.last_ticker}_investment_memo_{datetime.now().strftime('%Y%m%d')}.txt"

        with open(filename, 'w', encoding='utf-8') as f:
            f.write(app.last_memo)

        print(f"✅ Memo saved as: {filename}")

        # Download the file
        files.download(filename)
    else:
        print("❌ No memo to download. Please generate a memo first.")

# Create download button
download_button = widgets.Button(
    description="💾 Download Last Memo",
    button_style='warning'
)

download_button.on_click(lambda x: download_memo())
display(download_button)




✅ Memo saved as: Private_Company_investment_memo_20250826.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>