In [None]:
# Earnings Call Transcripts API

This notebook implements a class to retrieve earnings call transcripts and related data from the EarningsCall.biz API.

API Documentation: https://earningscall.biz/api-guide

In [None]:
import requests
import pandas as pd
import json
from datetime import datetime
from typing import Optional, Dict, List, Union
import os

In [None]:
class EarningsTranscript:
    """
    Retrieve earnings call transcripts and related data from EarningsCall.biz API.
    
    API provides:
    - Calendar of upcoming earnings calls
    - List of earnings events by company
    - Transcripts (multiple levels: basic text, speaker diarization, timestamps, Q&A separation)
    - Audio files
    - Slide decks
    - Full company list
    """
    
    def __init__(self, api_key: str = None):
        """
        Initialize EarningsTranscript client.
        
        Parameters:
        -----------
        api_key : str, optional
            EarningsCall.biz API key. If not provided, will look for 'earningscall_api_key' 
            in secrets.json or use 'demo' key (limited functionality).
        """
        if api_key:
            self.api_key = api_key
        else:
            # Try to load from secrets.json
            try:
                secrets_path = self._find_secrets()
                with open(secrets_path, 'r') as f:
                    secrets = json.load(f)
                    self.api_key = secrets.get('earningscall_api_key', 'demo')
            except:
                self.api_key = 'demo'
        
        self.base_url = "https://v2.api.earningscall.biz"
    
    def _find_secrets(self):
        """Find secrets.json file."""
        possible_paths = [
            "secrets.json",
            "../secrets.json",
            "../../secrets.json",
            os.path.join(os.path.dirname(os.path.dirname(__file__)), "secrets.json")
        ]
        for path in possible_paths:
            if os.path.exists(path):
                return path
        raise FileNotFoundError("secrets.json not found")
    
    def get_calendar(self, year: int, month: int, day: int) -> pd.DataFrame:
        """
        Get earnings event calendar for a specific date.
        
        Parameters:
        -----------
        year : int
            4-digit year (e.g., 2025)
        month : int
            Month number (1-12)
        day : int
            Day of month (1-31)
            
        Returns:
        --------
        pandas.DataFrame
            Earnings events with exchange, symbol, year, quarter, conference_date, 
            company_name, transcript_ready
            
        Example:
        --------
        cal = earnings.get_calendar(2025, 1, 10)
        """
        url = f"{self.base_url}/calendar"
        params = {
            'apikey': self.api_key,
            'year': year,
            'month': month,
            'day': day
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        data = response.json()
        
        if not data:
            return pd.DataFrame()
        
        df = pd.DataFrame(data)
        
        # Convert conference_date to datetime
        if 'conference_date' in df.columns:
            df['conference_date'] = pd.to_datetime(df['conference_date'])
        
        return df
    
    def get_events(self, exchange: str, symbol: str) -> Dict:
        """
        Get list of all earnings events for a specific company.
        
        Parameters:
        -----------
        exchange : str
            Exchange code (NYSE, NASDAQ, AMEX, TSX, TSXV, OTC, LSE, CBOE, STO, ASX)
        symbol : str
            Ticker symbol (e.g., 'AAPL', 'MSFT')
            
        Returns:
        --------
        dict
            Dictionary with 'company_name' and 'events' (list of earnings calls with 
            year, quarter, conference_date)
            
        Example:
        --------
        events = earnings.get_events('NASDAQ', 'AAPL')
        print(events['company_name'])
        df = pd.DataFrame(events['events'])
        """
        url = f"{self.base_url}/events"
        params = {
            'apikey': self.api_key,
            'exchange': exchange.upper(),
            'symbol': symbol.upper()
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        data = response.json()
        
        # Convert conference dates to datetime in events list
        if 'events' in data:
            for event in data['events']:
                if 'conference_date' in event:
                    event['conference_date'] = pd.to_datetime(event['conference_date'])
        
        return data
    
    def get_transcript(
        self, 
        exchange: str, 
        symbol: str, 
        year: int, 
        quarter: int,
        level: int = 1
    ) -> Dict:
        """
        Get earnings call transcript.
        
        Parameters:
        -----------
        exchange : str
            Exchange code (NYSE, NASDAQ, AMEX, TSX, TSXV, OTC, LSE, CBOE, STO, ASX)
        symbol : str
            Ticker symbol
        year : int
            Year of earnings call (e.g., 2023)
        quarter : int
            Quarter number (1, 2, 3, or 4)
        level : int, default 1
            Transcript detail level:
            - Level 1: Basic transcript text (single string)
            - Level 2: Speaker diarization with names and titles
            - Level 3: Word-level timestamps
            - Level 4: Separated prepared remarks and Q&A sections
            Note: Levels 2-4 require Enhanced Transcript Plan
            
        Returns:
        --------
        dict
            Transcript data structure varies by level:
            - Level 1: {'event': {...}, 'text': 'full transcript...'}
            - Level 2: {'event': {...}, 'speaker_name_map_v2': {...}, 'speakers': [...]}
            - Level 3: {'event': {...}, 'speakers': [{'speaker': 'spk01', 'words': [...], 'start_times': [...]}]}
            - Level 4: {'event': {...}, 'prepared_remarks': '...', 'questions_and_answers': '...'}
            
        Example:
        --------
        # Basic transcript
        transcript = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1)
        print(transcript['text'][:500])
        
        # With speaker names
        transcript = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1, level=2)
        for speaker_segment in transcript['speakers']:
            speaker_id = speaker_segment['speaker']
            name = transcript['speaker_name_map_v2'][speaker_id]['name']
            print(f"{name}: {speaker_segment['text'][:100]}...")
        
        # Prepared remarks vs Q&A
        transcript = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1, level=4)
        print(f"Prepared Remarks: {len(transcript['prepared_remarks'])} chars")
        print(f"Q&A: {len(transcript['questions_and_answers'])} chars")
        """
        url = f"{self.base_url}/transcript"
        params = {
            'apikey': self.api_key,
            'exchange': exchange.upper(),
            'symbol': symbol.upper(),
            'year': year,
            'quarter': quarter,
            'level': level
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        return response.json()
    
    def get_audio(
        self, 
        exchange: str, 
        symbol: str, 
        year: int, 
        quarter: int,
        output_path: Optional[str] = None
    ) -> bytes:
        """
        Download earnings call audio file.
        
        Parameters:
        -----------
        exchange : str
            Exchange code
        symbol : str
            Ticker symbol
        year : int
            Year of earnings call
        quarter : int
            Quarter number (1-4)
        output_path : str, optional
            File path to save audio. If None, returns audio bytes.
            
        Returns:
        --------
        bytes
            Audio data (audio/mpeg format) if output_path is None
            
        Example:
        --------
        # Save to file
        earnings.get_audio('NASDAQ', 'MSFT', 2022, 1, 'msft_q1_2022.mp3')
        
        # Get audio bytes
        audio_data = earnings.get_audio('NASDAQ', 'AAPL', 2023, 1)
        """
        url = f"{self.base_url}/audio"
        params = {
            'apikey': self.api_key,
            'exchange': exchange.upper(),
            'symbol': symbol.upper(),
            'year': year,
            'quarter': quarter
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        audio_data = response.content
        
        if output_path:
            with open(output_path, 'wb') as f:
                f.write(audio_data)
            print(f"Audio saved to {output_path}")
        else:
            return audio_data
    
    def get_slides(
        self, 
        exchange: str, 
        symbol: str, 
        year: int, 
        quarter: int,
        output_path: Optional[str] = None
    ) -> bytes:
        """
        Download earnings call slide deck (presentation).
        
        Parameters:
        -----------
        exchange : str
            Exchange code
        symbol : str
            Ticker symbol
        year : int
            Year of earnings call
        quarter : int
            Quarter number (1-4)
        output_path : str, optional
            File path to save slides. If None, returns slide bytes.
            
        Returns:
        --------
        bytes
            Slide deck data (typically PDF) if output_path is None
            
        Example:
        --------
        # Save to file
        earnings.get_slides('NASDAQ', 'MSFT', 2025, 1, 'msft_q1_2025_slides.pdf')
        
        # Get slide bytes
        slides_data = earnings.get_slides('NASDAQ', 'AAPL', 2023, 1)
        """
        url = f"{self.base_url}/slides"
        params = {
            'apikey': self.api_key,
            'exchange': exchange.upper(),
            'symbol': symbol.upper(),
            'year': year,
            'quarter': quarter
        }
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        slides_data = response.content
        
        if output_path:
            with open(output_path, 'wb') as f:
                f.write(slides_data)
            print(f"Slides saved to {output_path}")
        else:
            return slides_data
    
    def get_symbols(self) -> pd.DataFrame:
        """
        Get list of all companies with earnings call data available (6000+ companies).
        Note: Demo API key returns only 2 companies.
        
        Returns:
        --------
        pandas.DataFrame
            Companies with exchange, name, symbol columns
            
        Example:
        --------
        companies = earnings.get_symbols()
        print(f"Total companies: {len(companies)}")
        tech_companies = companies[companies['name'].str.contains('Tech', case=False)]
        """
        url = f"{self.base_url}/symbols"
        params = {'apikey': self.api_key}
        
        response = requests.get(url, params=params)
        response.raise_for_status()
        
        data = response.json()
        
        return pd.DataFrame(data)
    
    def get_events_dataframe(self, exchange: str, symbol: str) -> pd.DataFrame:
        """
        Convenience method to get events as a DataFrame instead of dict.
        
        Parameters:
        -----------
        exchange : str
            Exchange code
        symbol : str
            Ticker symbol
            
        Returns:
        --------
        pandas.DataFrame
            Earnings events sorted by conference date descending
            
        Example:
        --------
        aapl_events = earnings.get_events_dataframe('NASDAQ', 'AAPL')
        """
        events_data = self.get_events(exchange, symbol)
        
        if 'events' not in events_data:
            return pd.DataFrame()
        
        df = pd.DataFrame(events_data['events'])
        df['company_name'] = events_data.get('company_name', '')
        
        if 'conference_date' in df.columns:
            df = df.sort_values('conference_date', ascending=False).reset_index(drop=True)
        
        return df

## Example Usage

Initialize the class and explore the API functionality.

In [None]:
# Initialize with demo key (limited functionality)
earnings = EarningsTranscript()
print(f"Using API key: {earnings.api_key}")

### 1. Get Calendar - Earnings events for a specific date

In [None]:
# Get earnings calendar for January 10, 2025 (demo date)
calendar = earnings.get_calendar(2025, 1, 10)
print(f"Earnings events on 2025-01-10: {len(calendar)}")
calendar.head()

### 2. Get Events - All earnings calls for a specific company

In [None]:
# Get all AAPL earnings events
aapl_events = earnings.get_events_dataframe('NASDAQ', 'AAPL')
print(f"Total AAPL earnings calls: {len(aapl_events)}")
print(f"Company: {aapl_events['company_name'].iloc[0]}")
aapl_events.head(10)

### 3. Get Transcript - Level 1 (Basic Text)

In [None]:
# Get basic transcript text for AAPL Q1 2023
transcript_l1 = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1, level=1)

print(f"Event: Q{transcript_l1['event']['quarter']} {transcript_l1['event']['year']}")
print(f"Conference Date: {transcript_l1['event']['conference_date']}")
print(f"\nTranscript length: {len(transcript_l1['text'])} characters")
print(f"\nFirst 500 characters:\n{transcript_l1['text'][:500]}...")

### 4. Get Transcript - Level 2 (Speaker Diarization)

In [None]:
# Get transcript with speaker names and titles
transcript_l2 = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1, level=2)

print("Speaker Name Map:")
for speaker_id, info in transcript_l2['speaker_name_map_v2'].items():
    print(f"  {speaker_id}: {info['name']} - {info['title']}")

print(f"\nTotal speaker segments: {len(transcript_l2['speakers'])}")
print("\nFirst 3 speaker segments:")
for i, segment in enumerate(transcript_l2['speakers'][:3]):
    speaker_id = segment['speaker']
    name = transcript_l2['speaker_name_map_v2'][speaker_id]['name']
    text_preview = segment['text'][:200]
    print(f"\n{i+1}. {name} ({speaker_id}):\n   {text_preview}...")

### 5. Get Transcript - Level 4 (Prepared Remarks + Q&A Separated)

In [None]:
# Get transcript with separated sections
transcript_l4 = earnings.get_transcript('NASDAQ', 'AAPL', 2023, 1, level=4)

print(f"Prepared Remarks: {len(transcript_l4['prepared_remarks'])} characters")
print(f"Q&A Section: {len(transcript_l4['questions_and_answers'])} characters")
print(f"\nPrepared Remarks preview:\n{transcript_l4['prepared_remarks'][:400]}...")
print(f"\n\nQ&A preview:\n{transcript_l4['questions_and_answers'][:400]}...")

### 6. Get Available Companies

In [None]:
# Get list of all companies (demo returns 2 companies)
companies = earnings.get_symbols()
print(f"Total companies available: {len(companies)}")
companies

### 7. Download Audio and Slides (Optional)

In [None]:
# Uncomment to download audio file
# earnings.get_audio('NASDAQ', 'MSFT', 2022, 1, 'output/msft_q1_2022.mp3')

# Uncomment to download slide deck
# earnings.get_slides('NASDAQ', 'MSFT', 2025, 1, 'output/msft_q1_2025_slides.pdf')

print("Audio and slide downloads are commented out. Uncomment to use.")