# Competitive Landscape Analysis using SEC Filings

This notebook analyzes competitive relationships between companies by extracting competitor mentions from SEC filings, particularly 10-K reports. The analysis creates a network of self-reported competitive relationships that reveals industry structures and company positioning.

## Methodology
- Extracts competitor mentions from Item 1 (Business) and Item 7 (MD&A) sections of 10-K filings
- Uses AI-powered text analysis (GPT-4o) to identify explicit competitor relationships
- Applies company name deduplication to handle different naming conventions
- Builds a directed graph where edges represent competitive relationships (source mentions sink in their report)

## Key Features
- **Data Collection**: Automated extraction of competitive intelligence from SEC filings
- **Network Analysis**: Visualizes company relationships as directed graphs
- **Interactive Dashboards**: Multiple visualization options including heatmaps and network diagrams
- **Metrics**: Identifies most-referenced companies and competitive intensity

## Business Applications
- Identify direct and indirect competitors for investment analysis
- Understand competitive positioning and market structure
- Discover potential acquisition targets or partnership opportunities
- Track changes in competitive landscape over time

In [None]:
import sys
import pandas as pd
from collections import defaultdict
import importlib
sys.path.append("../")
from wallstreet_quant.edgar_extractor import fetch_10K_and_10Q_filings, extract_items_from_filing
from wallstreet_quant import edgar_ai
importlib.reload(edgar_ai)
from wallstreet_quant.edgar_ai import competitors_analysis
from wallstreet_quant import utils
importlib.reload(utils)
from wallstreet_quant.utils import CompanyDeduper

## Fetch ticker data from SEC

In [None]:
import requests

def fetch_sec_ticker_map():
    # Define headers for SEC API requests
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Accept': 'application/json'
    }

    ticker_url = "https://www.sec.gov/files/company_tickers.json"
    response = requests.get(ticker_url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        ticker_map = {v['ticker']: str(v['title']).zfill(10) 
                    for _, v in data.items()}
    else:
        print(f"Failed to fetch data: {response.status_code}")
        print(f"Response content: {response.text[:100]}...")  # Print beginning of response
        ticker_map = {}  # Initialize as empty dict on failure
    return ticker_map

In [None]:
filings = defaultdict(list)
symbols = ['TSLA', 'NVDA', 'AAPL', 'AMZN', 'GOOGL', 'MSFT', 'META', 'NFLX', 'AMD', 'INTC', 'PLTR', 'CRM', 'PYPL', 'UBER', 'LYFT', 'SQ', 'SNAP', 'SHOP', 'ZM', 'ADBE', 'ORCL', 'IBM', 'CSCO', 'QCOM', 'TXN', 'AVGO', 'MU', 'NOW', 'DOCU', 'FSLY', 'OKTA', 'ZS', 'CRWD', 'NET', 'DDOG', 'PANW', 'FTNT', 'SPLK', 'TEAM', 'ASML', 'LRCX', 'KLAC', 'ADI', 'MCHP', 'SWKS', 'XLNX', 'WDC', 'STX', 'HPE', 'DELL', 'HPQ', 'NTAP', 'VMW', 'CSX', 'UAL', 'DAL', 'LUV', 'AAL', 'BA', 'GE', 'CAT', 'MMM', 'HON', 'UPS', 'FDX', 'GM', 'F', 'TMUS', 'VZ', 'T']
for s in symbols:
    try:
        filings[s] = fetch_10K_and_10Q_filings(s, "2023-01-01", "2025-6-6",form=["10-K"])
    except Exception as e:
        print(e)

## Analyze the SEC filings with GPT

In [None]:
competitors = defaultdict(list)
items_needed = ['1', '7']
raw = []

# Re-import to get latest version
importlib.reload(edgar_ai)
from wallstreet_quant.edgar_ai import competitors_analysis

for s in symbols:
    if len(filings[s]) == 0:
        continue
    print(f"Processing {s}...")
    curr = extract_items_from_filing(filings[s][0], items_needed)
    try:
        competitors[s] = competitors_analysis(s, curr['1'] + "\n\n" + curr['7'], model="gpt-4o")
        # If competitors is empty, try with the whole filing text
        if not competitors[s]:
            print(f"No competitors found with items 1&7, trying with whole filing for {s}...")
            competitors[s] = competitors_analysis(s, filings[s][0].text(), model="gpt-4o")
    except KeyError as e:
        print(f"Extraction error for {s}: {e}")
        competitors[s] = competitors_analysis(s, filings[s][0].text(), model="gpt-4o")

    
    print(f"Competitors for {s}: {competitors[s]}")
    for c in competitors[s]:
        raw.append(c)

In [None]:
# Deduplicate competitor names
ticker_map = fetch_sec_ticker_map()
deduper = CompanyDeduper(ticker_map, cosine_th=0.80)
clusters, name2cid, reps = deduper.dedupe(raw)
print("Name → cid → rep")
for n in raw:
    cid = name2cid[n]
    print(f"{n:25} → {cid} → {reps[cid]}")
print("\nClusters:")
for i, g in enumerate(clusters):
    print(i, g)

## Plot the analytics

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import pandas as pd

def replace_with_representatives(competitors_dict, name2cid, reps):
    """
    Replace keys and values in competitors dict with their respective representatives.
    
    Args:
        competitors_dict: Dictionary with company symbols as keys and lists of competitors as values
        name2cid: Dictionary mapping company names to cluster IDs
        reps: List of representative names for each cluster
    
    Returns:
        Dictionary with deduplicated company names as keys and deduplicated competitors as values
    """
    deduplicated_competitors = {}
    
    for symbol, competitor_list in competitors_dict.items():
        # Get the representative for the symbol (key)
        if symbol in name2cid:
            symbol_rep = reps[name2cid[symbol]]
        else:
            symbol_rep = symbol  # Keep original if not found
        
        # Get representatives for all competitors (values)
        deduplicated_list = []
        for competitor in competitor_list:
            if competitor in name2cid:
                competitor_rep = reps[name2cid[competitor]]
                # Avoid adding the company itself as a competitor
                if competitor_rep != symbol_rep and competitor_rep not in deduplicated_list:
                    deduplicated_list.append(competitor_rep)
            else:
                # Keep original if not found and it's not the company itself
                if competitor != symbol_rep and competitor not in deduplicated_list:
                    deduplicated_list.append(competitor)
        
        # Only add if we have competitors
        if deduplicated_list:
            deduplicated_competitors[symbol_rep] = deduplicated_list
    
    return deduplicated_competitors

def create_competitor_graph(deduplicated_competitors):
    """
    Create a directed graph from the deduplicated competitors dictionary.
    
    Args:
        deduplicated_competitors: Dictionary with company names as keys and competitor lists as values
    
    Returns:
        NetworkX DiGraph object
    """
    G = nx.DiGraph()
    
    # Add all nodes first
    all_companies = set(deduplicated_competitors.keys())
    for competitors_list in deduplicated_competitors.values():
        all_companies.update(competitors_list)
    
    G.add_nodes_from(all_companies)
    
    # Add edges (company -> competitor)
    for company, competitors_list in deduplicated_competitors.items():
        for competitor in competitors_list:
            G.add_edge(company, competitor)
    
    return G

def plot_interactive_network(G):
    """
    Create an interactive Plotly network visualization that's great for large networks.
    """
    # Calculate layout
    pos = nx.spring_layout(G, k=1, iterations=50)
    
    # Calculate in-degrees
    in_degrees = dict(G.in_degree())
    max_in_degree = max(in_degrees.values()) if in_degrees else 1
    
    # Prepare edge traces
    edge_x = []
    edge_y = []
    edge_info = []
    
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        edge_info.append(f"{edge[0]} → {edge[1]}")
    
    edge_trace = go.Scatter(x=edge_x, y=edge_y,
                           line=dict(width=1, color='#888'),
                           hoverinfo='none',
                           mode='lines')
    
    # Prepare node traces
    node_x = []
    node_y = []
    node_text = []
    node_sizes = []
    node_colors = []
    
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        
        # Node info
        in_deg = in_degrees.get(node, 0)
        out_deg = G.out_degree(node)
        
        node_text.append(f"{node}<br>Incoming: {in_deg}<br>Outgoing: {out_deg}")
        node_sizes.append(10 + (in_deg / max_in_degree) * 30)
        node_colors.append(in_deg)
    
    node_trace = go.Scatter(x=node_x, y=node_y,
                           mode='markers+text',
                           hoverinfo='text',
                           text=[node[:15] + "..." if len(node) > 15 else node for node in G.nodes()],
                           textposition="middle center",
                           textfont=dict(size=8),
                           hovertext=node_text,
                           marker=dict(size=node_sizes,
                                     color=node_colors,
                                     colorscale='Viridis',
                                     colorbar=dict(title="Incoming References"),
                                     line=dict(width=2, color='white')))
    
    fig = go.Figure(data=[edge_trace, node_trace],
                   layout=go.Layout(
                        title='Interactive Company Competitive Network',
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20,l=5,r=5,t=40),
                        annotations=[ dict(
                            text="Hover over nodes for details. Zoom and pan to explore.",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002,
                            xanchor="left", yanchor="bottom",
                            font=dict(color="#888", size=12)
                        )],
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        width=1000,
                        height=800))
    
    fig.show()

def plot_competitive_heatmap(deduplicated_competitors):
    """
    Create a heatmap showing competitive relationships matrix.
    """
    # Get all unique companies
    all_companies = set(deduplicated_competitors.keys())
    for competitors in deduplicated_competitors.values():
        all_companies.update(competitors)
    
    companies_list = sorted(list(all_companies))
    n = len(companies_list)
    
    # Create adjacency matrix
    matrix = np.zeros((n, n))
    company_to_idx = {comp: i for i, comp in enumerate(companies_list)}
    
    for company, competitors in deduplicated_competitors.items():
        if company in company_to_idx:
            i = company_to_idx[company]
            for competitor in competitors:
                if competitor in company_to_idx:
                    j = company_to_idx[competitor]
                    matrix[i][j] = 1
    
    # Create interactive heatmap
    fig = go.Figure(data=go.Heatmap(
                    z=matrix,
                    x=companies_list,
                    y=companies_list,
                    colorscale='Blues',
                    hovertemplate='%{y} considers %{x} a competitor<extra></extra>'))
    
    fig.update_layout(
        title='Company Competitive Relationships Matrix',
        xaxis_title='Competitors',
        yaxis_title='Companies',
        width=800,
        height=800
    )
    
    fig.show()

def plot_top_competitors_analysis(G, top_n=20):
    """
    Focus on the most important companies by in-degree and out-degree.
    """
    in_degrees = dict(G.in_degree())
    out_degrees = dict(G.out_degree())
    
    # Get top companies by in-degree (most mentioned as competitors)
    top_by_indegree = sorted(in_degrees.items(), key=lambda x: x[1], reverse=True)[:top_n]
    top_companies = set([comp for comp, _ in top_by_indegree])
    
    # Add companies with high out-degree too
    top_by_outdegree = sorted(out_degrees.items(), key=lambda x: x[1], reverse=True)[:top_n//2]
    top_companies.update([comp for comp, _ in top_by_outdegree])
    
    # Create subgraph with only top companies
    subgraph = G.subgraph(top_companies).copy()
    
    # Plot the focused subgraph
    plt.figure(figsize=(14, 10))
    pos = nx.spring_layout(subgraph, k=2, iterations=50)
    
    # Node sizes and colors based on in-degree
    node_sizes = [500 + in_degrees.get(node, 0) * 100 for node in subgraph.nodes()]
    node_colors = [in_degrees.get(node, 0) for node in subgraph.nodes()]
    
    # Draw the network
    nx.draw_networkx_edges(subgraph, pos, edge_color='#cccccc', arrows=True, 
                          arrowsize=20, alpha=0.7, width=2)
    
    nodes = nx.draw_networkx_nodes(subgraph, pos, node_size=node_sizes, 
                                  node_color=node_colors, cmap=plt.cm.viridis,
                                  alpha=0.8, linewidths=2, edgecolors='white')
    
    # Add labels
    labels = {node: node[:12] + "..." if len(node) > 12 else node for node in subgraph.nodes()}
    nx.draw_networkx_labels(subgraph, pos, labels, font_size=9, font_weight='bold')
    
    plt.title(f'Top {len(subgraph.nodes())} Most Competitive Companies', 
              fontsize=16, fontweight='bold', pad=20)
    plt.axis('off')
    
    # Add colorbar
    sm = plt.cm.ScalarMappable(cmap=plt.cm.viridis, 
                              norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
    sm.set_array([])
    cbar = plt.colorbar(sm, shrink=0.8)
    cbar.set_label('Incoming Competitive References', fontsize=12)
    
    plt.tight_layout()
    plt.show()

def create_competitive_dashboard(deduplicated_competitors, G):
    """
    Create a comprehensive dashboard with multiple views.
    """
    in_degrees = dict(G.in_degree())
    out_degrees = dict(G.out_degree())
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Most Referenced Companies', 'Most Competitive Companies',
                       'Company Network Metrics', 'Competitive Density by In-Degree'),
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "scatter"}, {"type": "histogram"}]]
    )
    
    # Top referenced companies (highest in-degree)
    top_referenced = sorted(in_degrees.items(), key=lambda x: x[1], reverse=True)[:15]
    companies_ref, indegrees_ref = zip(*top_referenced) if top_referenced else ([], [])
    
    fig.add_trace(
        go.Bar(x=list(indegrees_ref), y=list(companies_ref), orientation='h',
               name='Incoming References', marker_color='lightblue'),
        row=1, col=1
    )
    
    # Most competitive companies (highest out-degree)
    top_competitive = sorted(out_degrees.items(), key=lambda x: x[1], reverse=True)[:15]
    companies_comp, outdegrees_comp = zip(*top_competitive) if top_competitive else ([], [])
    
    fig.add_trace(
        go.Bar(x=list(outdegrees_comp), y=list(companies_comp), orientation='h',
               name='Outgoing References', marker_color='lightcoral'),
        row=1, col=2
    )
    
    # Scatter plot of in-degree vs out-degree
    companies = list(G.nodes())
    in_deg_vals = [in_degrees.get(comp, 0) for comp in companies]
    out_deg_vals = [out_degrees.get(comp, 0) for comp in companies]
    
    fig.add_trace(
        go.Scatter(x=in_deg_vals, y=out_deg_vals, mode='markers',
                  text=companies, name='Companies',
                  marker=dict(size=8, opacity=0.7)),
        row=2, col=1
    )
    
    # Histogram of in-degrees
    fig.add_trace(
        go.Histogram(x=in_deg_vals, nbinsx=20, name='In-Degree Distribution',
                    marker_color='green', opacity=0.7),
        row=2, col=2
    )
    
    fig.update_layout(height=800, showlegend=False, 
                     title_text="Competitive Landscape Analysis Dashboard")
    fig.show()

# Apply the functions with different visualization approaches
print("Creating deduplicated competitors...")
deduplicated_competitors = replace_with_representatives(competitors, name2cid, reps)
competitor_graph = create_competitor_graph(deduplicated_competitors)

print("\n" + "="*60)
print("VISUALIZATION OPTIONS:")
print("="*60)
print("1. Interactive Network (best for exploration)")
print("2. Competitive Heatmap (shows all relationships)")  
print("3. Top Competitors Focus (cleaner, focused view)")
print("4. Comprehensive Dashboard (multiple metrics)")
print("="*60)

# Show the dashboard first (most informative)
create_competitive_dashboard(deduplicated_competitors, competitor_graph)

## Plot the competition graph

In [None]:
def plot_static_network(competitor_graph, title="Company Competitive Relationships", 
                         max_nodes=50, min_connections=1, 
                         figsize=(16, 12), node_size_multiplier=20, 
                         font_size=8, edge_alpha=0.6, show_isolated=False,
                         save_path=None, dpi=300):
    """
    Create a static matplotlib network plot with filtering for readability.
    
    Parameters:
    - competitor_graph: NetworkX DiGraph of competitors
    - title: Plot title
    - max_nodes: Maximum number of nodes to display
    - min_connections: Minimum degree (in + out) for a node to be included
    - figsize: Figure size tuple
    - node_size_multiplier: Multiplier for node sizes
    - font_size: Font size for labels
    - edge_alpha: Transparency of edges
    - show_isolated: Whether to show nodes with no connections
    - save_path: Path to save the plot as PNG (e.g., 'network_plot.png')
    - dpi: Resolution for saved image
    """
    import matplotlib.pyplot as plt
    import networkx as nx
    import numpy as np
    from matplotlib import cm
    
    # Create a copy to avoid modifying original
    G = competitor_graph.copy()
    
    # Filter nodes based on connection count
    if not show_isolated:
        nodes_to_remove = []
        for node in G.nodes():
            total_degree = G.in_degree(node) + G.out_degree(node)
            if total_degree < min_connections:
                nodes_to_remove.append(node)
        G.remove_nodes_from(nodes_to_remove)
    
    # If still too many nodes, keep only the most connected ones
    if len(G.nodes()) > max_nodes:
        # Calculate total degree for each node
        node_degrees = [(node, G.in_degree(node) + G.out_degree(node)) 
                       for node in G.nodes()]
        # Sort by degree and keep top nodes
        node_degrees.sort(key=lambda x: x[1], reverse=True)
        nodes_to_keep = [node for node, _ in node_degrees[:max_nodes]]
        G = G.subgraph(nodes_to_keep).copy()
    
    if len(G.nodes()) == 0:
        print(f"No nodes to display with min_connections={min_connections}")
        return
    
    print(f"Displaying {len(G.nodes())} nodes and {len(G.edges())} edges")
    
    # Create the plot
    plt.figure(figsize=figsize)
    
    # Use spring layout with more iterations for better spacing
    try:
        pos = nx.spring_layout(G, k=3, iterations=50, seed=42)
    except:
        # Fallback to circular layout if spring fails
        pos = nx.circular_layout(G)
    
    # Calculate node sizes based on in-degree (popularity)
    in_degrees = [G.in_degree(node) for node in G.nodes()]
    max_in_degree = max(in_degrees) if in_degrees else 1
    node_sizes = [(degree + 1) * node_size_multiplier * (1000 / max(len(G.nodes()), 50)) 
                  for degree in in_degrees]
    
    # Color nodes by in-degree
    node_colors = in_degrees
    
    # Draw the network
    nx.draw_networkx_nodes(G, pos, 
                          node_size=node_sizes,
                          node_color=node_colors,
                          cmap=plt.cm.Reds,
                          alpha=0.8,
                          linewidths=1,
                          edgecolors='black')
    
    # Draw edges with reduced alpha for clarity
    nx.draw_networkx_edges(G, pos, 
                          alpha=edge_alpha,
                          edge_color='gray',
                          arrows=True,
                          arrowsize=10,
                          arrowstyle='->',
                          width=0.5)
    
    # Add labels with better positioning
    labels = {}
    for node in G.nodes():
        # Truncate long names
        label = node[:20] + "..." if len(node) > 20 else node
        labels[node] = label
    
    nx.draw_networkx_labels(G, pos, labels, 
                           font_size=font_size,
                           font_weight='bold',
                           bbox=dict(boxstyle="round,pad=0.2", 
                                   facecolor='white', 
                                   alpha=0.8,
                                   edgecolor='none'))
    
    plt.title(title, fontsize=16, fontweight='bold', pad=20)
    
    # Add colorbar for in-degree
    if max(in_degrees) > 0:
        sm = plt.cm.ScalarMappable(cmap=plt.cm.Reds, 
                                  norm=plt.Normalize(vmin=min(in_degrees), 
                                                   vmax=max(in_degrees)))
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=plt.gca())
        cbar.set_label('Number of Companies Mentioning as Competitor', rotation=270, labelpad=20)
    
    # Add legend
    plt.figtext(0.02, 0.02, 
               f"Nodes: {len(G.nodes())} | Edges: {len(G.edges())} | "
               f"Min connections: {min_connections}", 
               fontsize=10, style='italic')
    
    plt.axis('off')
    plt.tight_layout()
    
    # Save the plot if save_path is provided
    if save_path:
        plt.savefig(save_path, dpi=dpi, bbox_inches='tight', 
                   facecolor='white', edgecolor='none')
        print(f"Plot saved as: {save_path}")
    
    plt.show()

# Test the static network plot and save as PNG
print("Creating static network visualization...")
plot_static_network(competitor_graph, 
                   title="Company Competitive Relationships (Static View)",
                   max_nodes=30,  # Limit for readability
                   min_connections=2,  # Only show well-connected companies
                   figsize=(16, 12),
                   font_size=9,
                   save_path="competitive_network.png")