In [3]:
pip install edgartools textblob beautifulsoup4 requests yfinance scikit-learn

Collecting edgartools
  Downloading edgartools-4.20.0-py3-none-any.whl.metadata (8.5 kB)
Collecting httpxthrottlecache>=0.1.6 (from edgartools)
  Downloading httpxthrottlecache-0.2.1-py3-none-any.whl.metadata (6.8 kB)
Collecting rank-bm25>=0.2.1 (from edgartools)
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting rapidfuzz>=3.5.0 (from edgartools)
  Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting stamina>=24.2.0 (from edgartools)
  Downloading stamina-25.1.0-py3-none-any.whl.metadata (5.9 kB)
Collecting textdistance>=4.5.0 (from edgartools)
  Downloading textdistance-4.6.3-py3-none-any.whl.metadata (18 kB)
Collecting unidecode>=1.2.0 (from edgartools)
  Downloading Unidecode-1.4.0-py3-none-any.whl.metadata (13 kB)
Collecting hishel>=0.1.3 (from httpxthrottlecache>=0.1.6->edgartools)
  Downloading hishel-0.1.4-py3-none-any.whl.metadata (14 kB)
Collecting pyrate-limiter>=3.9.0 (from httpxthrottl

In [16]:
import json
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
import re
from textblob import TextBlob
import yfinance as yf
import numpy as np
from sklearn.linear_model import LinearRegression
from edgar import Company, set_identity

class IPOWatchAgent:
    def __init__(self, news_api_key, finnhub_api_key):
        self.news_api_key = news_api_key
        self.finnhub_api_key = finnhub_api_key
        self.memory_file = 'ipo_memory.json'
        self.load_memory()
        # Set User-Agent for SEC EDGAR requests
        set_identity("Sanjay YourEmail@example.com")  # Replace with your name and email

    def load_memory(self):
        try:
            with open(self.memory_file, 'r') as f:
                data = json.load(f)
                self.memory = data.get('past_ipos', {})
                self.weights = data.get('weights', [0.33, 0.33, 0.34])  # w1: growth, w2: sentiment, w3: risk penalty
        except FileNotFoundError:
            self.memory = {}
            self.weights = [0.33, 0.33, 0.34]

    def save_memory(self):
        data = {'past_ipos': self.memory, 'weights': self.weights}
        with open(self.memory_file, 'w') as f:
            json.dump(data, f)

    def plan_research(self, company_name):
        return [
            f"Retrieve S-1 filing for {company_name} from SEC EDGAR (or fallback to alternative sources if unavailable).",
            "Extract financials (e.g., revenue growth) and risk disclosures.",
            "Ingest news via NewsAPI.",
            "Preprocess news (clean text).",
            "Classify news (sentiment analysis).",
            "Extract key information from news.",
            "Summarize news.",
            "Overlay sentiment and compute IPO attractiveness score.",
            "Self-reflect on output quality.",
            "Route based on potential (high/low).",
            "Generate investor brief.",
            "If post-IPO, evaluate with actual returns and optimize weights."
        ]

    def find_symbol(self, company_name):
        url = f"https://finnhub.io/api/v1/search?q={company_name}&token={self.finnhub_api_key}"
        response = requests.get(url)
        if response.status_code == 200:
            results = response.json().get('result', [])
            for result in results:
                if company_name.lower() in result['description'].lower():
                    return result['symbol']
        return None

    def fetch_news(self, company_name):
        url = f"https://newsapi.org/v2/everything?q={company_name}&apiKey={self.news_api_key}&sortBy=publishedAt"
        response = requests.get(url)
        if response.status_code == 200:
            articles = response.json().get('articles', [])
            return [article.get('description') or article.get('content') or '' for article in articles]
        return []

    def preprocess_news(self, news_texts):
        return [re.sub(r'[^\w\s]', '', text.lower()) for text in news_texts if text]

    def classify_sentiment(self, texts):
        return [TextBlob(text).sentiment.polarity for text in texts]

    def extract_key(self, texts):
        keywords = []
        for text in texts:
            words = text.split()
            keywords.extend([word for word in words if len(word) > 5])
        return ' '.join(set(keywords[:10]))

    def summarize_news(self, extracted, sentiments):
        avg_sentiment = np.mean(sentiments) if sentiments else 0
        return f"Summary of {len(sentiments)} articles: Key terms - {extracted}. Average sentiment: {avg_sentiment:.2f}."

    def get_ipo_return(self, symbol, ipo_date_str):
        try:
            ipo_date = datetime.strptime(ipo_date_str, '%Y-%m-%d')
            end_date = datetime.today()
            history = yf.Ticker(symbol).history(start=ipo_date, end=end_date)
            if not history.empty:
                open_price = history['Open'].iloc[0]
                close_price = history['Close'].iloc[-1]
                return (close_price - open_price) / open_price
        except Exception:
            pass
        return None

    def self_reflect(self, growth, avg_sentiment, risk_count):
        missing = 0
        if growth == 0: missing += 1
        if avg_sentiment == 0: missing += 1
        if risk_count == 0: missing += 1
        quality_score = 1 - (missing / 3)
        print(f"Self-reflection: Quality score {quality_score:.2f} (based on data completeness).")
        return quality_score

    def optimize_weights(self):
        if len(self.memory) < 3:
            return
        X = []
        y = []
        for data in self.memory.values():
            if 'actual' in data and 'features' in data:
                X.append(data['features'])
                y.append(data['actual'])
        if len(X) >= 3:
            reg = LinearRegression().fit(X, y)
            self.weights = reg.coef_.tolist()
            print(f"Optimized weights: {self.weights}")
            self.save_memory()

    def research(self, company_name):
        # Agent Function 1: Plan
        plan = self.plan_research(company_name)
        print("Research Plan:")
        for step in plan:
            print(f"- {step}")

        # Retrieve S-1 (dynamic tool: EDGAR)
        filing_date = None
        growth = 0
        risk_count = 0
        try:
            company = Company(company_name)
            s1_filings = company.get_filings(form="S-1")
            if not s1_filings or s1_filings.empty:
                print(f"No S-1 filing found for {company_name}. Using fallback data.")
            else:
                filing = s1_filings.latest()
                filing_date = filing.filing_date
                # Extract financials
                if filing.xbrl():
                    try:
                        financials = filing.xbrl()
                        income = financials.get_income_statement().to_dataframe()
                        revenue_cols = [col for col in income.columns if 'revenue' in col.lower() or 'sales' in col.lower()]
                        if revenue_cols:
                            revenues = income[revenue_cols[0]].dropna().values
                            if len(revenues) >= 2:
                                growth = (revenues[-1] - revenues[-2]) / revenues[-2]
                    except Exception:
                        pass
                # Extract risks
                html = filing.html()
                soup = BeautifulSoup(html, 'html.parser')
                risk_header = soup.find(lambda tag: tag.name in ['h1', 'h2', 'h3', 'p'] and 'risk factors' in tag.text.lower())
                if risk_header:
                    risk_text = ''
                    current = risk_header.next_element
                    while current and not (hasattr(current, 'name') and current.name in ['h1', 'h2', 'h3']):
                        if hasattr(current, 'text'):
                            risk_text += current.text
                        current = current.next_element
                    risk_count = len(re.findall(r'\brisk\b', risk_text, re.I))
        except Exception as e:
            print(f"Error retrieving filing: {str(e)}. Using fallback data.")

        # Fallback for Groupe Dynamite (Canadian company, SEDAR+ filing)
        if company_name.lower() == "groupe dynamite inc.":
            filing_date = "2024-11-20"  # PREP prospectus filed with SEDAR+
            growth = 0.197  # 19.7% revenue growth (C$958.5M in 2024 vs C$800.8M in 2023)
            risk_count = 8  # From public reports: fashion trends, supply chain, competition, etc.

        # Workflow Pattern 1: Prompt Chaining for news
        news = self.fetch_news(company_name)  # Ingest
        preprocessed = self.preprocess_news(news)  # Preprocess
        sentiments = self.classify_sentiment(preprocessed)  # Classify
        extracted = self.extract_key(preprocessed)  # Extract
        summary = self.summarize_news(extracted, sentiments)  # Summarize
        avg_sentiment = np.mean(sentiments) if sentiments else 0

        # Compute score (using learned weights)
        normalized_risk = risk_count / 100.0  # Simple normalization
        features = [growth, avg_sentiment, -normalized_risk]
        score = sum(w * f for w, f in zip(self.weights, features))

        # Agent Function 3: Self-reflect
        quality = self.self_reflect(growth, avg_sentiment, risk_count)
        if quality < 0.5:
            print("Low quality detected; consider manual review or additional data sources.")

        # Workflow Pattern 2: Routing
        if score > 0:
            route = "High potential: Focus on growth opportunities and positive sentiment."
        else:
            route = "Low potential: Highlight risks and cautious outlook."

        # Generate brief
        brief = (
            f"IPO Attractiveness Brief for {company_name}:\n"
            f"Filing Date: {filing_date or 'Unknown (no SEC S-1; check SEDAR+ for non-U.S. filings)'}\n"
            f"Revenue Growth: {growth:.2%}\n"
            f"Avg News Sentiment: {avg_sentiment:.2f}\n"
            f"Risk Mentions: {risk_count}\n"
            f"Attractiveness Score: {score:.2f}\n"
            f"News Summary: {summary}\n"
            f"Routing: {route}\n"
            f"Note: For {company_name}, financials and risks sourced from public reports (SEDAR+ summaries, news) due to no SEC S-1."
        )

        # Workflow Pattern 3: Evaluator-Optimizer (if post-IPO)
        symbol = self.find_symbol(company_name)
        if symbol:
            actual_return = self.get_ipo_return(symbol, filing_date or "2024-11-21")
            if actual_return is not None:
                print(f"Evaluating: Predicted score {score:.2f} vs Actual return {actual_return:.2%}")
                self.memory[company_name] = {
                    'predicted': score,
                    'actual': actual_return,
                    'features': features
                }
                self.save_memory()
                self.optimize_weights()  # Agent Function 4: Learn across runs

        return brief

# Example usage
if __name__ == "__main__":
    agent = IPOWatchAgent(
        news_api_key='------------------------',  # Replace if needed
        finnhub_api_key='-------------------------'  # Replace with your Finnhub API key
    )
    result = agent.research("BETA Technologies")
    print(result)

Research Plan:
- Retrieve S-1 filing for BETA Technologies from SEC EDGAR (or fallback to alternative sources if unavailable).
- Extract financials (e.g., revenue growth) and risk disclosures.
- Ingest news via NewsAPI.
- Preprocess news (clean text).
- Classify news (sentiment analysis).
- Extract key information from news.
- Summarize news.
- Overlay sentiment and compute IPO attractiveness score.
- Self-reflect on output quality.
- Route based on potential (high/low).
- Generate investor brief.
- If post-IPO, evaluate with actual returns and optimize weights.
No S-1 filing found for BETA Technologies. Using fallback data.
Self-reflection: Quality score 0.33 (based on data completeness).
Low quality detected; consider manual review or additional data sources.
IPO Attractiveness Brief for BETA Technologies:
Filing Date: Unknown (no SEC S-1; check SEDAR+ for non-U.S. filings)
Revenue Growth: 0.00%
Avg News Sentiment: 0.11
Risk Mentions: 0
Attractiveness Score: 0.04
News Summary: Summar

In [None]:
#------------------------------- end of python--------------------------------------------