In [None]:
GitHub Developer Analysis - Step by Step
This notebook demonstrates how to analyze a GitHub developer's profile, repositories, and activities step by step using the Cognee pipeline. Each step in the pipeline is executed separately, allowing you to observe the knowledge graph evolving as more data is added.


In [None]:
Initial Setup

In [1]:
import asyncio
import logging
import os
import sys
from pathlib import Path

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [4]:
# Import necessary modules
from cognee.api.v1.cognify.github_developer_pipeline import run_github_developer_pipeline
from cognee.modules.data.deletion import prune_data, prune_system
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.tasks.github.config import get_github_settings
from IPython.display import IFrame, HTML, display
import os

In [5]:
# Set GitHub username to analyze
github_username = "Vasilije1990"  # Replace with the username you want to analyze

# Get GitHub API token from environment or settings
api_token = get_github_settings().GITHUB_API_TOKEN
if api_token:
    print(f"‚úÖ Using GitHub API token from environment/settings")
else:
    print("‚ö†Ô∏è No GitHub API token found. Using unauthenticated API access (rate limits may apply).")
    print("   To set a token, you can create a .env file with GITHUB_API_TOKEN=your_token")

‚úÖ Using GitHub API token from environment/settings


In [6]:
async def clean_database():
    """Clean the existing database"""
    print("üßπ Cleaning existing database...")
    await prune_data()
    await prune_system(metadata=True)
    print("‚úÖ Database cleaned successfully")

await clean_database()

INFO:cognee.infrastructure.databases.graph.networkx.adapter:Graph deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.

üßπ Cleaning existing database...
‚úÖ Database cleaned successfully


In [7]:
async def show_graph():
    """Visualize the current state of the knowledge graph"""
    print("üîç Generating graph visualization...")
    # Generate the visualization HTML
    visualization_path = os.path.join(os.getcwd(), "github_graph_visualization.html")
    html_content = await visualize_graph(visualization_path)
    
    # Display the graph in the notebook
    display(IFrame(src=visualization_path, width="100%", height=600))
    print(f"‚úÖ Graph visualization saved to {visualization_path}")

In [8]:
async def run_fetch_repositories():
    """Run the fetch_repositories task"""
    print(f"üìã Fetching repositories for {github_username}...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["fetch_repositories"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Repository fetching completed")

await run_fetch_repositories()
await show_graph()



üìã Fetching repositories for Vasilije1990...
User 37cf9b76-54cb-467e-9c6d-848ec057fbe2 has registered.
‚è≥ Task 1 completed: Starting task: fetch_repositories
‚è≥ Task 2 completed: fetch_repositories: processed Developer




‚è≥ Task 3 completed: fetch_repositories: processed Repository
‚è≥ Task 4 completed: fetch_repositories: processed Edge
‚è≥ Task 5 completed: fetch_repositories: processed Repository
‚è≥ Task 6 completed: fetch_repositories: processed Edge
‚è≥ Task 7 completed: fetch_repositories: processed Repository
‚è≥ Task 8 completed: fetch_repositories: processed Edge
‚è≥ Task 9 completed: fetch_repositories: processed Repository
‚è≥ Task 10 completed: fetch_repositories: processed Edge
‚è≥ Task 11 completed: fetch_repositories: processed Repository
‚è≥ Task 12 completed: Added 10 data points to graph
‚è≥ Task 13 completed: fetch_repositories: processed Edge


INFO:cognee.modules.visualization.cognee_network_visualization:Graph visualization saved as /Users/vasilije/cognee/notebooks/github_graph_visualization.htmlINFO:root:The HTML file has been stored at path: /Users/vasilije/cognee/notebooks/github_graph_visualization.html

‚è≥ Task 14 completed: Added final 1 data points to graph
‚è≥ Task 15 completed: fetch_repositories: Found 5 repositories and 1 developers
‚úÖ Repository fetching completed
üîç Generating graph visualization...


‚úÖ Graph visualization saved to /Users/vasilije/cognee/notebooks/github_graph_visualization.html


In [None]:
async def run_fetch_contributors():
    """Run the fetch_contributors task"""
    print("üë• Fetching contributors for repositories...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["fetch_contributors"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Contributors fetching completed")

await run_fetch_contributors()
await show_graph()

In [None]:
async def run_fetch_readmes():
    """Run the fetch_readmes task"""
    print("üìÑ Fetching README documents for repositories...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["fetch_readmes"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ README fetching completed")

await run_fetch_readmes()
await show_graph()

In [None]:
async def run_summarize_readmes():
    """Run the summarize_readmes task"""
    print("ü§ñ Generating summaries for README documents using LLM...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["summarize_readmes"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ README summarization completed")

await run_summarize_readmes()
await show_graph()

In [None]:
async def run_fetch_pull_requests():
    """Run the fetch_pull_requests task"""
    print("üîÑ Fetching pull requests and comments...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["fetch_pull_requests"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Pull requests fetching completed")

await run_fetch_pull_requests()
await show_graph()

In [None]:
async def run_analyze_pr_sentiment():
    """Run the analyze_pr_sentiment task"""
    print("üòÄ Analyzing pull request discussions sentiment...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["analyze_pr_sentiment"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ PR sentiment analysis completed")

await run_analyze_pr_sentiment()
await show_graph()

In [None]:
async def run_build_collaboration():
    """Run the build_collaboration task"""
    print("üåê Building collaboration network...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["build_collaboration"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Collaboration network building completed")

await run_build_collaboration()
await show_graph()

In [None]:
async def run_analyze_network():
    """Run the analyze_network task"""
    print("üìä Analyzing developer network metrics...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["analyze_network"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Network analysis completed")

await run_analyze_network()
await show_graph()

In [None]:
async def run_generate_developer_summary():
    """Run the generate_developer_summary task"""
    print("üìù Generating developer profile summary with LLM...")
    task_count = 0
    async for status in run_github_developer_pipeline(github_username, api_token, ["generate_developer_summary"]):
        task_count += 1
        print(f"‚è≥ Task {task_count} completed: {status}")
    print("‚úÖ Developer summary generation completed")

await run_generate_developer_summary()
await show_graph()