# Product Hunt Competitor Finder

This notebook helps you find competitors on Product Hunt for your business idea.

## Setup

Before running, set your Product Hunt API credentials as environment variables:

```python
import os
os.environ['PRODUCTHUNT_CLIENT_ID'] = 'your_client_id'
os.environ['PRODUCTHUNT_CLIENT_SECRET'] = 'your_client_secret'
# OR if you already have an access token:
os.environ['PRODUCTHUNT_ACCESS_TOKEN'] = 'your_access_token'
```

## Usage

Simply call `find_competitors()` with your idea:

```python
competitors = find_competitors("your business idea here", max_results=10)
display_competitors(competitors)
```


In [22]:
import os
import requests
import json
from typing import List, Dict, Optional
from datetime import datetime
import re

# Configuration
PRODUCTHUNT_API_URL = "https://api.producthunt.com/v2/api/graphql"
PRODUCTHUNT_CLIENT_ID = os.getenv("PRODUCTHUNT_CLIENT_ID", "")
PRODUCTHUNT_CLIENT_SECRET = os.getenv("PRODUCTHUNT_CLIENT_SECRET", "")
PRODUCTHUNT_ACCESS_TOKEN = os.getenv("PRODUCTHUNT_ACCESS_TOKEN", "")  # Optional: if you already have a token


In [23]:
def get_access_token(client_id: str, client_secret: str) -> Optional[str]:
    """
    Get an access token using client credentials flow.
    This is for public API access without user authentication.
    """
    token_url = "https://api.producthunt.com/v2/oauth/token"
    
    payload = {
        "client_id": client_id,
        "client_secret": client_secret,
        "grant_type": "client_credentials"
    }
    
    try:
        response = requests.post(token_url, data=payload)
        response.raise_for_status()
        data = response.json()
        return data.get("access_token")
    except Exception as e:
        print(f"Error getting access token: {e}")
        return None


In [24]:
def extract_keywords(idea: str) -> List[str]:
    """
    Extract relevant keywords from an idea to use for Product Hunt filtering.
    """
    # Extended stop words list
    stop_words = {'a', 'an', 'the', 'is', 'are', 'was', 'were', 'be', 'been', 
                  'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 
                  'would', 'should', 'could', 'may', 'might', 'must', 'can', 
                  'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 
                  'it', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 
                  'why', 'how', 'for', 'to', 'of', 'in', 'on', 'at', 'by', 
                  'with', 'from', 'about', 'into', 'through', 'during', 'before', 
                  'after', 'above', 'below', 'up', 'down', 'out', 'off', 'over', 
                  'under', 'again', 'further', 'then', 'once', 'and', 'or', 'but', 
                  'if', 'because', 'as', 'until', 'while', 'so', 'app', 'platform', 
                  'tool', 'service', 'product', 'solution', 'powered', 'based',
                  'using', 'via', 'made', 'create', 'creating', 'help', 'helps'}
    
    # Clean and split
    words = re.findall(r'\b\w+\b', idea.lower())
    keywords = [w for w in words if w not in stop_words and len(w) > 2]
    
    # Prioritize longer, more specific keywords
    keywords.sort(key=lambda x: len(x), reverse=True)
    
    # Return top keywords (most relevant)
    return keywords[:6]  # Top 6 keywords


In [25]:
def fetch_producthunt_posts(access_token: str, limit: int = 50, order: str = "VOTES") -> List[Dict]:
    """
    Fetch posts from Product Hunt. Since search isn't supported, we fetch posts and filter client-side.
    Reduced complexity by limiting nested fields.
    """
    graphql_query = """
    query GetPosts($first: Int!, $order: PostsOrder!) {
      posts(first: $first, order: $order) {
        edges {
          node {
            id
            name
            tagline
            description
            url
            website
            votesCount
            commentsCount
            createdAt
            topics(first: 5) {
              edges {
                node {
                  name
                }
              }
            }
          }
        }
      }
    }
    """
    
    variables = {
        "first": limit,
        "order": order
    }
    
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "query": graphql_query,
        "variables": variables
    }
    
    try:
        response = requests.post(PRODUCTHUNT_API_URL, json=payload, headers=headers)
        response.raise_for_status()
        data = response.json()
        
        if "errors" in data:
            print(f"GraphQL errors: {data['errors']}")
            return []
        
        posts = data.get("data", {}).get("posts", {}).get("edges", [])
        return [edge["node"] for edge in posts]
    except Exception as e:
        print(f"Error fetching Product Hunt posts: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"Response: {e.response.text}")
        return []


def filter_posts_by_relevance(posts: List[Dict], keywords: List[str], idea: str) -> List[Dict]:
    """
    Filter posts by relevance to the idea using keyword matching.
    """
    idea_lower = idea.lower()
    keyword_set = set(k.lower() for k in keywords)
    
    scored_posts = []
    
    for post in posts:
        score = 0
        
        # Check name
        name = post.get("name", "").lower()
        for keyword in keyword_set:
            if keyword in name:
                score += 3
        
        # Check tagline
        tagline = post.get("tagline", "").lower()
        for keyword in keyword_set:
            if keyword in tagline:
                score += 2
        
        # Check description
        description = post.get("description", "").lower()
        for keyword in keyword_set:
            if keyword in description:
                score += 1
        
        # Check topics
        topics = post.get("topics", {}).get("edges", [])
        topic_names = " ".join([t["node"]["name"].lower() for t in topics])
        for keyword in keyword_set:
            if keyword in topic_names:
                score += 2
        
        # Bonus for exact phrase matches
        if any(keyword in idea_lower for keyword in keyword_set if len(keyword) > 4):
            score += 1
        
        if score > 0:
            post["_relevance_score"] = score
            scored_posts.append(post)
    
    # Sort by relevance score, then by votes
    scored_posts.sort(key=lambda x: (x.get("_relevance_score", 0), x.get("votesCount", 0)), reverse=True)
    
    return scored_posts


In [26]:
def find_competitors(idea: str, max_results: int = 10, fetch_limit: int = 50) -> List[Dict]:
    """
    Find competitors on Product Hunt for a given idea.
    
    Since Product Hunt API doesn't support search, we fetch posts and filter client-side.
    
    Args:
        idea: The product/business idea to find competitors for
        max_results: Maximum number of competitors to return
        fetch_limit: Number of posts to fetch from Product Hunt (higher = more comprehensive but slower)
    
    Returns:
        List of competitor products with relevant information
    """
    # Get access token
    access_token = PRODUCTHUNT_ACCESS_TOKEN
    if not access_token and PRODUCTHUNT_CLIENT_ID and PRODUCTHUNT_CLIENT_SECRET:
        print("Getting access token...")
        access_token = get_access_token(PRODUCTHUNT_CLIENT_ID, PRODUCTHUNT_CLIENT_SECRET)
    
    if not access_token:
        raise ValueError("No access token available. Please set PRODUCTHUNT_ACCESS_TOKEN or PRODUCTHUNT_CLIENT_ID/SECRET")
    
    # Extract keywords from the idea
    keywords = extract_keywords(idea)
    print(f"Extracted keywords: {', '.join(keywords)}")
    
    # Fetch posts from Product Hunt (sorted by votes to get popular products)
    print(f"\nFetching top {fetch_limit} posts from Product Hunt...")
    all_posts = fetch_producthunt_posts(access_token, limit=fetch_limit, order="VOTES")
    
    if not all_posts:
        print("No posts fetched. Check your API credentials and connection.")
        return []
    
    print(f"Fetched {len(all_posts)} posts. Filtering by relevance...")
    
    # Filter posts by relevance to the idea
    relevant_posts = filter_posts_by_relevance(all_posts, keywords, idea)
    
    print(f"Found {len(relevant_posts)} relevant competitors.")
    
    # Remove the relevance score before returning
    for post in relevant_posts:
        post.pop("_relevance_score", None)
    
    return relevant_posts[:max_results]


In [27]:
def display_competitors(competitors: List[Dict]):
    """
    Display competitors in a readable format.
    """
    if not competitors:
        print("No competitors found.")
        return
    
    print(f"\n{'='*80}")
    print(f"Found {len(competitors)} competitors:")
    print(f"{'='*80}\n")
    
    for i, competitor in enumerate(competitors, 1):
        print(f"{i}. {competitor.get('name', 'N/A')}")
        print(f"   Tagline: {competitor.get('tagline', 'N/A')}")
        print(f"   Votes: {competitor.get('votesCount', 0):,}")
        print(f"   Comments: {competitor.get('commentsCount', 0):,}")
        print(f"   URL: {competitor.get('url', 'N/A')}")
        print(f"   Website: {competitor.get('website', 'N/A')}")
        
        topics = competitor.get('topics', {}).get('edges', [])
        if topics:
            topic_names = [t['node']['name'] for t in topics]
            print(f"   Topics: {', '.join(topic_names)}")
        
        created_at = competitor.get('createdAt', '')
        if created_at:
            print(f"   Launched: {created_at[:10]}")
        
        print()


In [29]:
# Example usage
if __name__ == "__main__":
    # Example idea
    idea = "A streamlined decision-making tool for collectors, helping antique, art, and memorabilia enthusiasts quickly evaluate options and reduce analysis paralysis with personalized recommendations."
    
    print(f"Searching for competitors for: '{idea}'\n")
    competitors = find_competitors(idea, max_results=10)
    display_competitors(competitors)


Searching for competitors for: 'A streamlined decision-making tool for collectors, helping antique, art, and memorabilia enthusiasts quickly evaluate options and reduce analysis paralysis with personalized recommendations.'

Getting access token...
Extracted keywords: recommendations, personalized, streamlined, memorabilia, enthusiasts, collectors

Fetching top 50 posts from Product Hunt...
Fetched 20 posts. Filtering by relevance...
Found 20 relevant competitors.

Found 10 competitors:

1. Floqer
   Tagline: The AI copilot for GTM data automation
   Votes: 697
   Comments: 232
   URL: https://www.producthunt.com/products/floqer-2?utm_campaign=producthunt-api&utm_medium=api-v2&utm_source=Application%3A+CBrain+%28ID%3A+245619%29
   Website: https://www.producthunt.com/r/AAS4GEQDSZ6URU?utm_campaign=producthunt-api&utm_medium=api-v2&utm_source=Application%3A+CBrain+%28ID%3A+245619%29
   Topics: Sales, Marketing, Data
   Launched: 2025-11-04

2. Cursor 2.0
   Tagline: Our first coding mode