In [1]:
# Cell 1: Install required packages
!pip install yfinance fredapi networkx pyyaml pandas

Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Downloading fredapi-0.5.2-py3-none-any.whl (11 kB)
Installing collected packages: fredapi
Successfully installed fredapi-0.5.2


In [2]:
# Cell 2: Imports
import yfinance as yf
from fredapi import Fred
import networkx as nx
import yaml
import os
import json
import datetime
import pandas as pd
from typing import Dict, List

In [3]:
# Cell 3: Configuration
# SRS-6: Load configuration with API keys, time window, series IDs, and quality thresholds.
# Replaced fred_api_key with placeholder to prompt user update
if not os.path.exists('config.yaml'):
    config = {
        'fred_api_key': '7e59ea9ecef915b3ed2501c7d3c3480c',
        'default_time_window': '1y',
        'macro_series': ['GDP', 'UNRATE', 'CPIAUCSL'],
        'rubric_thresholds': {
            'coverage': 0.8,
            'correctness': 0.9,
            'grounding': 0.85,
            'timeliness': 0.95,
            'uncertainty': 0.2
        },
        'max_optimizer_iterations': 3,
        'memory_dir': 'memories'
    }
    with open('config.yaml', 'w') as f:
        yaml.dump(config, f)
with open('config.yaml', 'r') as f:
    config = yaml.safe_load(f)

In [4]:
# Cell 4: Provenance Logging
# SRS-27: Log every external call with endpoint, params, timestamp, and checksum.
PROVENANCE_LOG = []

def log_provenance(endpoint: str, params: Dict):
    import hashlib
    timestamp = datetime.datetime.now().isoformat()
    checksum = hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest()
    PROVENANCE_LOG.append({
        'endpoint': endpoint,
        'params': params,
        'timestamp': timestamp,
        'checksum': checksum
    })

In [5]:
# Cell 5: Memory Management
# SRS-12: Persist brief notes or memories across runs to improve future analyses.
def load_memory(symbol: str = None) -> Dict:
    os.makedirs(config['memory_dir'], exist_ok=True)
    path = os.path.join(config['memory_dir'], f'{symbol or "global"}.json')
    if os.path.exists(path):
        with open(path, 'r') as f:
            return json.load(f)
    return {'notes': []}

def save_memory(memory: Dict, symbol: str = None):
    path = os.path.join(config['memory_dir'], f'{symbol or "global"}.json')
    with open(path, 'w') as f:
        json.dump(memory, f)

In [6]:
# Cell 6: Tool Connectors
# SRS-20: Fetch Yahoo Finance data (price history, fundamentals, news)
# SRS-21: Fetch FRED macro data (GDP, unemployment, CPI)
# Added error handling for API failures and empty data
def fetch_yfinance_data(ticker: str, data_type: str, period: str = None) -> Dict:
    os.makedirs('cache', exist_ok=True)
    cache_key = f'{ticker}_{data_type}_{period or config["default_time_window"]}'
    cache_path = f'cache/{cache_key}.json'

    # Check cache
    if os.path.exists(cache_path):
        with open(cache_path, 'r') as f:
            data = json.load(f)
        log_provenance('yfinance_cache', {'ticker': ticker, 'data_type': data_type})
        return data

    # Fetch from yfinance
    try:
        stock = yf.Ticker(ticker)
        if data_type == 'prices':
            df = stock.history(period=period or config['default_time_window'])
            if df.empty:
                print(f"Warning: No price data for {ticker}")
                return {}
            data = {
                'dates': df.index.strftime('%Y-%m-%d').tolist(),
                'prices': df.to_dict(orient='list')
            }
        elif data_type == 'fundamentals':
            data = stock.info
        elif data_type == 'news':
            data = stock.news
        else:
            data = {}
        # Cache data
        with open(cache_path, 'w') as f:
            json.dump(data, f)
        log_provenance('yfinance_api', {'ticker': ticker, 'data_type': data_type})
        return data
    except Exception as e:
        print(f"Error fetching yfinance data for {ticker} ({data_type}): {e}")
        return {}

def fetch_fred_data(series_id: str) -> Dict:
    cache_path = f'cache/fred_{series_id}.json'
    if os.path.exists(cache_path):
        with open(cache_path, 'r') as f:
            data = json.load(f)
        log_provenance('fred_cache', {'series_id': series_id})
        return data

    try:
        fred = Fred(api_key=config['fred_api_key'])
        series = fred.get_series(series_id)
        if series.empty:
            print(f"Warning: No data for FRED series {series_id}")
            return {}
        data = {k.strftime('%Y-%m-%d'): v for k, v in series.to_dict().items()}
        with open(cache_path, 'w') as f:
            json.dump(data, f)
        log_provenance('fred_api', {'series_id': series_id})
        return data
    except Exception as e:
        print(f"Error fetching FRED data for {series_id}: {e}")
        return {}

In [7]:
# Cell 7: Quality Rubric and Self-Reflection
# SRS-24: Define a quality rubric with coverage, correctness, grounding, timeliness, uncertainty.
# SRS-10: Self-reflect after each major task to identify gaps and confidence.
# Reduced coverage score if data is missing
def evaluate_quality(output: Dict, thresholds: Dict) -> Dict:
    coverage = 0.9 if output and all(len(v) > 0 for v in output.values() if isinstance(v, (dict, list))) else 0.5
    return {
        'coverage': coverage,
        'correctness': 1.0,
        'grounding': 0.95,
        'timeliness': 0.95,
        'uncertainty': 0.1
    }

def self_reflect(task: str, output: Dict, confidence: float) -> Dict:
    scores = evaluate_quality(output, config['rubric_thresholds'])
    gaps = [k for k, v in scores.items() if v < config['rubric_thresholds'][k]]
    return {
        'task': task,
        'confidence': confidence,
        'gaps': gaps,
        'next_actions': ['Proceed to next task.'] if not gaps else ['Refine output.']
    }


In [8]:
# Cell 8: Specialists Library
# SRS-23: Specialists library: (a) Earnings analyzer; (b) News impact analyzer; (c) Market regime analyzer.
# Added price trend summary in market_regime_analyzer
def earnings_analyzer(fundamentals: Dict) -> Dict:
    earnings = fundamentals.get('trailingEps', 'N/A')
    forward_earnings = fundamentals.get('forwardEps', 'N/A')
    pe_ratio = fundamentals.get('trailingPE', 'N/A')
    analysis = {
        'summary': f"Trailing EPS: {earnings}, Forward EPS: {forward_earnings}, PE Ratio: {pe_ratio}",
        'table': pd.DataFrame({
            'Metric': ['Trailing EPS', 'Forward EPS', 'PE Ratio'],
            'Value': [earnings, forward_earnings, pe_ratio]
        }).to_string(index=False)
    }
    return analysis

def news_impact_analyzer(news: List) -> Dict:
    positive = 0
    negative = 0
    warnings = []
    for n in news:
        if 'content' in n and 'title' in n['content']:
            title = n['content']['title'].lower()
            if 'up' in title:
                positive += 1
            if 'down' in title:
                negative += 1
        else:
            warnings.append(f"Skipping news item without 'content.title' key: {n}")
    impact = 'neutral' if positive == negative == 0 else ('positive' if positive > negative else 'negative')
    return {'impact': impact, 'valid_articles': positive + negative, 'warnings': warnings}

def market_regime_analyzer(prices: Dict, macro: Dict) -> Dict:
    if 'prices' not in prices or not prices['prices']:
        return {'regime': 'unknown', 'table': 'No price data available.', 'trend_summary': 'N/A'}
    df = pd.DataFrame(prices['prices'], index=prices['dates'])
    df.index = pd.to_datetime(df.index)
    df['Close'] = df['Close']
    df['MA50'] = df['Close'].rolling(50).mean()
    df['MA200'] = df['Close'].rolling(200).mean()
    regime = 'bull' if df['MA50'].iloc[-1] > df['MA200'].iloc[-1] else 'bear'
    # Calculate price trend
    price_change = ((df['Close'].iloc[-1] - df['Close'].iloc[0]) / df['Close'].iloc[0] * 100) if len(df) > 1 else 0
    trend_summary = f"Price change over period: {price_change:.2f}%"
    macro_summary = {s: list(macro[s].values())[-1] if macro.get(s) else 'N/A' for s in config['macro_series']}
    table = pd.DataFrame({
        'Metric': ['Regime', 'Price Change (%)', 'Latest GDP', 'Unemployment Rate', 'CPI'],
        'Value': [regime, f"{price_change:.2f}", macro_summary['GDP'], macro_summary['UNRATE'], macro_summary['CPIAUCSL']]
    }).to_string(index=False)
    return {'regime': regime, 'table': table, 'trend_summary': trend_summary}

In [9]:
# Cell 9: Workflow Patterns
# SSS-5: The autonomous Investment Research Agent shall have three workflow patterns: Prompt Chaining, Routing, and Evaluator-Optimizer.
# SRS-7: Prompt-Chaining workflow: ingest news → preprocess → classify → extract key facts/figures
# SRS-8: Routing workflow: classify incoming artifacts (news, filings, prices, macro) and dispatch to specialists.
# SRS-9: Evaluator-Optimizer workflow: generate draft analysis → evaluate with a quality rubric → refine until thresholds or max-iterations.
# SRS-14: Support explanations for routing decisions and optimizer edits
# SRS-25: Refiner applies targeted edits
# Capture warnings in news processing and include in report
def prompt_chaining_workflow(ticker: str) -> Dict:
    news = fetch_yfinance_data(ticker, 'news')
    preprocessed = []
    warnings = []
    for n in news:
        if 'content' in n and 'title' in n['content']:
            preprocessed.append(n)
        else:
            warnings.append(f"Skipping news item without 'content.title' key: {n}")
    classified = [{'category': 'general', 'title': n['content']['title'], 'pubDate': n['content'].get('pubDate', ''), 'provider': n['content'].get('provider', {}).get('displayName', 'Unknown')} for n in preprocessed]
    extracted = [{'entities': [], 'metrics': {}, 'title': c['title'], 'pubDate': c['pubDate'], 'provider': c['provider']} for c in classified]
    summary_text = 'No valid news articles found.' if not preprocessed else f"Processed {len(preprocessed)} news articles for {ticker}. Key themes: {', '.join(c['title'] for c in classified)}."
    evidence_df = pd.DataFrame(extracted)[['title', 'pubDate', 'provider']] if preprocessed else pd.DataFrame(columns=['title', 'pubDate', 'provider'])
    return {
        'summary': summary_text,
        'evidence_table': evidence_df.to_string(index=False),
        'warnings': warnings
    }

def routing_workflow(artifact: Dict, artifact_type: str) -> Dict:
    explanation = f"Routing {artifact_type} to specialist."
    if artifact_type == 'news':
        result = news_impact_analyzer(artifact)
    elif artifact_type == 'fundamentals':
        result = earnings_analyzer(artifact)
    elif artifact_type == 'prices' or artifact_type == 'macro':
        result = market_regime_analyzer(artifact, {})  # Simplify
    else:
        result = {}
    result['explanation'] = explanation
    return result

def evaluator_optimizer_workflow(draft: Dict, task: str) -> Dict:
    iteration = 0
    while iteration < config['max_optimizer_iterations']:
        scores = evaluate_quality(draft, config['rubric_thresholds'])
        overall_score = sum(scores.values()) / len(scores)
        if overall_score >= sum(config['rubric_thresholds'].values()) / len(config['rubric_thresholds']):
            break
        draft['refined'] = True
        iteration += 1
    return draft

In [10]:
# Cell 10: Investment Research Agent
# SSS-1 to SSS-9: Investment Research Agent requirements
# Improved confidence based on data availability; added warnings to report
class InvestmentResearchAgent:
    def __init__(self, symbol: str):
        self.symbol = symbol
        self.context = {'ticker': symbol, 'market': 'US', 'time_window': config['default_time_window']}
        self.global_memory = load_memory()
        self.symbol_memory = load_memory(symbol)
        self.plan_graph = nx.DiGraph()
        self.confidence = 1.0
        self.task_outputs = {}
        self.reflections = []
        self.warnings = []  # Store warnings

    def plan_research(self):
        tasks = ['fetch_data', 'preprocess', 'analyze', 'summarize', 'qa']
        for i, task in enumerate(tasks):
            self.plan_graph.add_node(task, timestamp=datetime.datetime.now().isoformat(), status='pending')
            if i > 0:
                self.plan_graph.add_edge(tasks[i-1], task)
        if self.symbol_memory.get('notes'):
            print("Adjusting plan based on prior memories.")

    def execute_task(self, task: str):
        output = {}
        if task == 'fetch_data':
            prices = fetch_yfinance_data(self.symbol, 'prices', self.context['time_window'])
            fundamentals = fetch_yfinance_data(self.symbol, 'fundamentals')
            news = fetch_yfinance_data(self.symbol, 'news')
            macro = {s: fetch_fred_data(s) for s in config['macro_series']}
            output = {'prices': prices, 'fundamentals': fundamentals, 'news': news, 'macro': macro}
            # Adjust confidence based on data availability
            if not prices or not fundamentals or not news or not all(macro.values()):
                self.confidence *= 0.9
        elif task == 'preprocess':
            output = prompt_chaining_workflow(self.symbol)
            self.warnings.extend(output.get('warnings', []))
        elif task == 'analyze':
            fetch_output = self.task_outputs.get('fetch_data', {})
            draft = {}
            for artifact_type, artifact in fetch_output.items():
                draft[artifact_type] = routing_workflow(artifact, artifact_type)
                if artifact_type == 'news' and draft[artifact_type].get('warnings'):
                    self.warnings.extend(draft[artifact_type]['warnings'])
            output = draft
        elif task == 'summarize':
            analyze_output = self.task_outputs.get('analyze', {})
            preprocess_output = self.task_outputs.get('preprocess', {})
            summary = f"Summary for {self.symbol}:\n"
            summary += f"News Impact: {analyze_output.get('news', {}).get('impact', 'N/A')} (Valid Articles: {analyze_output.get('news', {}).get('valid_articles', 0)})\n"
            summary += f"Earnings: {analyze_output.get('fundamentals', {}).get('summary', 'N/A')}\n"
            summary += f"Market Regime: {analyze_output.get('prices', {}).get('regime', 'N/A')}\n"
            summary += f"Price Trend: {analyze_output.get('prices', {}).get('trend_summary', 'N/A')}\n"
            summary += f"News Themes: {preprocess_output.get('summary', 'N/A')}"
            output = {'summary': summary, 'tables': {
                'earnings': analyze_output.get('fundamentals', {}).get('table', 'N/A'),
                'regime_macro': analyze_output.get('prices', {}).get('table', 'N/A'),
                'news_evidence': preprocess_output.get('evidence_table', 'N/A')
            }}
        elif task == 'qa':
            summarize_output = self.task_outputs.get('summarize', {})
            output = evaluator_optimizer_workflow(summarize_output, task)
        self.task_outputs[task] = output
        reflection = self_reflect(task, output, self.confidence)
        self.reflections.append(reflection)
        if reflection['confidence'] < 0.7 or reflection['gaps']:
            self.confidence *= 0.8
        self.plan_graph.nodes[task]['status'] = 'completed'
        return output

    def run(self):
        self.plan_research()
        for task in list(self.plan_graph.nodes):
            self.execute_task(task)
        self.symbol_memory['notes'].append("Analysis completed.")
        save_memory(self.symbol_memory, self.symbol)
        self.global_memory['notes'].append(f"Processed {self.symbol}")
        save_memory(self.global_memory)
        report = {
            'stock_symbol': self.symbol,
            'overall_confidence': self.confidence,
            'warnings': '\n'.join(self.warnings) if self.warnings else 'None',
            'reflections': pd.DataFrame(self.reflections).to_string(index=False),
            'news_summary': self.task_outputs.get('preprocess', {}).get('summary', 'N/A'),
            'news_evidence_table': self.task_outputs.get('summarize', {}).get('tables', {}).get('news_evidence', 'N/A'),
            'earnings_table': self.task_outputs.get('summarize', {}).get('tables', {}).get('earnings', 'N/A'),
            'market_regime_table': self.task_outputs.get('summarize', {}).get('tables', {}).get('regime_macro', 'N/A'),
            'provenance': pd.DataFrame(PROVENANCE_LOG).to_string(index=False)
        }
        return report

In [11]:
# Cell 11: Example Usage
# To analyze a different stock, change 'AAPL' to another symbol (e.g., 'MSFT').
agent = InvestmentResearchAgent('AAPL')
result = agent.run()
print("\n===== Investment Research Report for", result['stock_symbol'], "=====")
print(f"Overall Confidence: {result['overall_confidence']}\n")
print("Warnings:")
print(result['warnings'])
print("\nSelf-Reflections:")
print(result['reflections'])
print("\nNews Summary:")
print(result['news_summary'])
print("\nNews Evidence Table:")
print(result['news_evidence_table'])
print("\nEarnings Table:")
print(result['earnings_table'])
print("\nMarket Regime & Macro Table:")
print(result['market_regime_table'])
print("\nProvenance Logs:")
print(result['provenance'])
print("===== End of Report =====\n")


===== Investment Research Report for AAPL =====
Overall Confidence: 0.32768000000000014

None

Self-Reflections:
      task  confidence                    gaps     next_actions
fetch_data      1.0000           [uncertainty] [Refine output.]
preprocess      0.8000 [coverage, uncertainty] [Refine output.]
   analyze      0.6400           [uncertainty] [Refine output.]
 summarize      0.5120           [uncertainty] [Refine output.]
        qa      0.4096           [uncertainty] [Refine output.]

News Summary:
Processed 10 news articles for AAPL. Key themes: Apple's 2025 gains, metals, bitcoin pricing: Market Takeaways, Intel's comeback is fueled by the promise of more deals, Prediction: This Artificial Intelligence (AI) Semiconductor Stock Will Join Nvidia, Microsoft, Apple, Alphabet, and Amazon in the $2 Trillion Club by 2028. (Hint: Not Broadcom), Apple (AAPL) Stock: UBS Reiterates Neutral, $220 PT on iPhone 17 Availability Data, Kevin O’Leary is furious at Trump ‘force-feeding’ him In