<a href="https://colab.research.google.com/github/nihalnihalani/Neo4j-hackathon-project/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install PyGithub neo4j google-generativeai pandas plotly gradio

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting neo4j
  Downloading neo4j-5.27.0-py3-none-any.whl.metadata (5.9 kB)
Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydu

In [None]:
import gradio as gr
import logging
import pandas as pd
from datetime import datetime
from github import Github
from neo4j import GraphDatabase
import google.generativeai as genai
import plotly.express as px
import plotly.graph_objects as go
from collections import defaultdict

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('github_analysis.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class GitHubAnalyzer:
    def __init__(self):
        self.g = None
        self.driver = None
        self.model = None

    def initialize_clients(self, github_token, neo4j_uri, neo4j_username, neo4j_password, google_api_key):
        """Initialize all API clients with error handling"""
        try:
            self.g = Github(github_token)
            self.driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_username, neo4j_password))
            genai.configure(api_key=google_api_key)
            self.model = genai.GenerativeModel('gemini-pro')
            logger.info("All clients initialized successfully")
            return True, "All APIs initialized successfully!"
        except Exception as e:
            logger.error(f"Error initializing clients: {str(e)}")
            return False, f"Error: {str(e)}"

    # [Previous methods remain the same until create_knowledge_graph...]

    def create_knowledge_graph(self, repo_name):
        """Enhanced knowledge graph creation with null value handling"""
        try:
            # Validate that clients are initialized
            if not all([self.g, self.driver, self.model]):
                return False, "Please initialize the clients first in the Setup tab"

            # Validate repository name format
            if "/" not in repo_name:
                return False, "Invalid repository name format. Please use 'owner/repo' format"

            # Try to get repository
            try:
                repo = self.g.get_repo(repo_name)
            except Exception as e:
                return False, f"Error accessing repository: {str(e)}"

            logger.info(f"Starting knowledge graph creation for {repo_name}")

            # Clear existing data for this repository
            with self.driver.session() as session:
                session.run(
                    "MATCH (r:Repository {name: $repo_name}) "
                    "DETACH DELETE r",
                    repo_name=repo_name
                )

            with self.driver.session() as session:
                # Create repository node with null-safe properties
                session.run("""
                    MERGE (repo:Repository {name: $repo_name})
                    SET
                        repo.description = $repo_description,
                        repo.stars = $repo_stars,
                        repo.forks = $repo_forks,
                        repo.created_at = $repo_created_at,
                        repo.updated_at = $repo_updated_at,
                        repo.language = $repo_language,
                        repo.open_issues_count = $open_issues_count,
                        repo.topics = $topics
                    """,
                    repo_name=repo.name,
                    repo_description=repo.description if repo.description is not None else "",
                    repo_stars=repo.stargazers_count,
                    repo_forks=repo.forks_count,
                    repo_language=repo.language if repo.language is not None else "Not specified",
                    open_issues_count=repo.open_issues_count,
                    topics=repo.get_topics() or [],
                    repo_created_at=repo.created_at.isoformat() if repo.created_at else None,
                    repo_updated_at=repo.updated_at.isoformat() if repo.updated_at else None
                )

                # Process issues with comments and labels
                for issue in repo.get_issues(state='all'):
                    # Create issue node with null-safe properties
                    session.run("""
                        MATCH (repo:Repository {name: $repo_name})
                        MERGE (user:User {login: $user_login})
                        MERGE (issue:Issue {number: $issue_number})
                        SET
                            issue.title = $issue_title,
                            issue.state = $issue_state,
                            issue.created_at = $issue_created_at,
                            issue.updated_at = $issue_updated_at,
                            issue.comments_count = $comments_count,
                            issue.body = $body
                        MERGE (user)-[:CREATED]->(issue)
                        MERGE (issue)-[:BELONGS_TO]->(repo)
                        """,
                        repo_name=repo.name,
                        user_login=issue.user.login if issue.user else "anonymous",
                        issue_number=issue.number,
                        issue_title=issue.title if issue.title is not None else "",
                        issue_state=issue.state if issue.state is not None else "unknown",
                        comments_count=issue.comments,
                        body=issue.body if issue.body is not None else "",
                        issue_created_at=issue.created_at.isoformat() if issue.created_at else None,
                        issue_updated_at=issue.updated_at.isoformat() if issue.updated_at else None
                    )

                    # Add labels with null-safe properties
                    for label in issue.labels:
                        session.run("""
                            MATCH (issue:Issue {number: $issue_number})
                            MERGE (label:Label {name: $label_name})
                            MERGE (issue)-[:HAS_LABEL]->(label)
                            """,
                            issue_number=issue.number,
                            label_name=label.name if label.name is not None else "unlabeled"
                        )

                    # Add comments with null-safe properties
                    for comment in issue.get_comments():
                        session.run("""
                            MATCH (issue:Issue {number: $issue_number})
                            MERGE (user:User {login: $user_login})
                            CREATE (comment:Comment)
                            SET
                                comment.body = $body,
                                comment.created_at = $created_at
                            MERGE (comment)-[:ON]->(issue)
                            MERGE (user)-[:WROTE]->(comment)
                            """,
                            issue_number=issue.number,
                            user_login=comment.user.login if comment.user else "anonymous",
                            body=comment.body if comment.body is not None else "",
                            created_at=comment.created_at.isoformat() if comment.created_at else None
                        )

            logger.info(f"Knowledge graph created successfully for {repo_name}")
            return True, "Knowledge graph created successfully!"
        except Exception as e:
            logger.error(f"Error creating knowledge graph: {str(e)}")
            return False, f"Error: {str(e)}"
    def get_graph_data(self, repo_name):
        """Retrieve graph data from Neo4j for visualization"""
        try:
            with self.driver.session() as session:
                # This query retrieves nodes and relationships for the specified repository.
                # You might need to adjust it based on your specific needs and graph structure.
                results = session.run("""
                    MATCH (repo:Repository {name: $repo_name})
                    OPTIONAL MATCH (repo)<-[:BELONGS_TO]-(issue:Issue)
                    OPTIONAL MATCH (issue)<-[:CREATED]-(user:User)
                    OPTIONAL MATCH (issue)-[:HAS_LABEL]->(label:Label)
                    RETURN repo, issue, user, label
                """, repo_name=repo_name)

                nodes = []
                edges = []
                node_ids = set()  # Keep track of added node IDs

                for record in results:
                    repo = record["repo"]
                    issue = record["issue"]
                    user = record["user"]
                    label = record["label"]

                    if repo.id not in node_ids:
                        nodes.append({"id": repo.id, "label": f"Repo: {repo['name']}", "color": "blue"})
                        node_ids.add(repo.id)

                    if issue and issue.id not in node_ids:
                        nodes.append({"id": issue.id, "label": f"Issue: {issue['title']}", "color": "green"})
                        node_ids.add(issue.id)
                        edges.append({"source": issue.id, "target": repo.id})

                    if user and user.id not in node_ids:
                        nodes.append({"id": user.id, "label": f"User: {user['login']}", "color": "orange"})
                        node_ids.add(user.id)
                        if issue:
                            edges.append({"source": user.id, "target": issue.id})

                    if label and label.id not in node_ids:
                        nodes.append({"id": label.id, "label": f"Label: {label['name']}", "color": "red"})
                        node_ids.add(label.id)
                        if issue:
                            edges.append({"source": issue.id, "target": label.id})

                return True, {"nodes": nodes, "edges": edges}
        except Exception as e:
            logger.error(f"Error retrieving graph data: {str(e)}")
            return False, f"Error: {str(e)}"


    def analyze_data(self, repo_name, analysis_type="general"):
        """Enhanced data analysis with multiple analysis types"""
        try:
            query = ""
            if analysis_type == "general":
                query = f"""
                MATCH (repo:Repository {{name: '{repo_name}'}})
                OPTIONAL MATCH (repo)<-[:BELONGS_TO]-(issue:Issue)
                OPTIONAL MATCH (issue)<-[:ON]-(comment:Comment)
                RETURN repo.description as description,
                       repo.stars as stars,
                       repo.forks as forks,
                       repo.language as language,
                       count(DISTINCT issue) as total_issues,
                       count(DISTINCT comment) as total_comments
                """
            elif analysis_type == "user_activity":
                query = f"""
                MATCH (repo:Repository {{name: '{repo_name}'}})<-[:BELONGS_TO]-(issue:Issue)<-[:CREATED]-(user:User)
                OPTIONAL MATCH (issue)<-[:ON]-(comment:Comment)<-[:WROTE]-(user)
                RETURN user.login as user_login,
                       count(DISTINCT issue) as issues_created,
                       count(DISTINCT comment) as comments_made
                ORDER BY issues_created DESC
                """
            elif analysis_type == "label_analysis":
                query = f"""
                MATCH (repo:Repository {{name: '{repo_name}'}})<-[:BELONGS_TO]-(issue:Issue)-[:HAS_LABEL]->(label:Label)
                RETURN label.name as label_name,
                       count(issue) as issue_count
                ORDER BY issue_count DESC
                """
            elif analysis_type == "timeline_analysis":
                query = f"""
                MATCH (repo:Repository {{name: '{repo_name}'}})<-[:BELONGS_TO]-(issue:Issue)
                RETURN issue.created_at as date,
                       count(issue) as count
                ORDER BY date
                """

            with self.driver.session() as session:
                results = session.run(query)
                data = [record.values() for record in results]

            # Generate visualizations based on analysis type
            if analysis_type == "general":
                repo_stats = {
                    "Description": data[0][0],
                    "Stars": data[0][1],
                    "Forks": data[0][2],
                    "Language": data[0][3],
                    "Total Issues": data[0][4],
                    "Total Comments": data[0][5]
                }

                # Create a bar chart for numeric metrics
                fig = go.Figure(data=[
                    go.Bar(
                        x=["Stars", "Forks", "Issues", "Comments"],
                        y=[repo_stats["Stars"], repo_stats["Forks"],
                           repo_stats["Total Issues"], repo_stats["Total Comments"]]
                    )
                ])
                fig.update_layout(title="Repository Statistics")

                analysis_text = f"""
                Repository Analysis:
                - Primary Language: {repo_stats['Language']}
                - Description: {repo_stats['Description']}
                - Engagement Metrics:
                  * {repo_stats['Stars']} stars
                  * {repo_stats['Forks']} forks
                  * {repo_stats['Total Issues']} issues
                  * {repo_stats['Total Comments']} comments
                """

                return True, (analysis_text, fig)

            elif analysis_type == "user_activity":
                df = pd.DataFrame(data, columns=['User', 'Issues', 'Comments'])
                fig = px.bar(df.head(10), x='User', y=['Issues', 'Comments'],
                            title='Top 10 Contributors',
                            barmode='group')

                analysis_text = "Top Contributors Analysis:\n"
                for _, row in df.head(5).iterrows():
                    analysis_text += f"- {row['User']}: {row['Issues']} issues, {row['Comments']} comments\n"

                return True, (analysis_text, fig)

            elif analysis_type == "label_analysis":
                df = pd.DataFrame(data, columns=['Label', 'Count'])
                fig = px.pie(df.head(10), values='Count', names='Label',
                            title='Issue Labels Distribution')

                analysis_text = "Label Analysis:\n"
                for _, row in df.head(5).iterrows():
                    analysis_text += f"- {row['Label']}: {row['Count']} issues\n"

                return True, (analysis_text, fig)

            elif analysis_type == "timeline_analysis":
                df = pd.DataFrame(data, columns=['Date', 'Count'])
                df['Date'] = pd.to_datetime(df['Date'])
                fig = px.line(df, x='Date', y='Count',
                             title='Issue Creation Timeline')

                analysis_text = f"""
                Timeline Analysis:
                - Total period: {df['Date'].min().date()} to {df['Date'].max().date()}
                - Peak activity: {df.loc[df['Count'].idxmax(), 'Date'].date()} ({df['Count'].max()} issues)
                - Average issues per day: {df['Count'].mean():.2f}
                """

                return True, (analysis_text, fig)

        except Exception as e:
            logger.error(f"Error analyzing data: {str(e)}")
            return False, (f"Error: {str(e)}", None)

    def get_logs(self, n_lines=50):
        """Retrieve the last n lines from the log file"""
        try:
            with open('github_analysis.log', 'r') as f:
                logs = f.readlines()
            return True, ''.join(logs[-n_lines:])
        except Exception as e:
            return False, f"Error reading logs: {str(e)}"

def create_ui():
    analyzer = GitHubAnalyzer()

    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        gr.Markdown("# GitHub Repository Analysis Dashboard")

        with gr.Tab("Setup"):
            github_token = gr.Textbox(label="GitHub Token", type="password")
            neo4j_uri = gr.Textbox(label="Neo4j URI")
            neo4j_username = gr.Textbox(label="Neo4j Username")
            neo4j_password = gr.Textbox(label="Neo4j Password", type="password")
            google_api_key = gr.Textbox(label="Google API Key", type="password")
            setup_button = gr.Button("Initialize")
            setup_output = gr.Textbox(label="Setup Status")

            setup_button.click(
                analyzer.initialize_clients,
                inputs=[github_token, neo4j_uri, neo4j_username, neo4j_password, google_api_key],
                outputs=setup_output
            )

        with gr.Tab("Knowledge Graph"):
            repo_name = gr.Textbox(label="Repository Name (e.g., 'owner/repo')")
            create_button = gr.Button("Create Knowledge Graph")
            create_output = gr.Textbox(label="Creation Status")

            create_button.click(
                analyzer.create_knowledge_graph,
                inputs=repo_name,
                outputs=create_output
            )

        with gr.Tab("Analysis"):
            analysis_type = gr.Radio(
                choices=["general", "user_activity", "label_analysis", "timeline_analysis"],
                label="Analysis Type",
                value="general"
            )
            analyze_button = gr.Button("Analyze")
            analysis_text = gr.Textbox(label="Analysis Results")
            analysis_plot = gr.Plot(label="Visualization")

            analyze_button.click(
                analyzer.analyze_data,
                inputs=[repo_name, analysis_type],
                outputs=[analysis_text, analysis_plot]
            )

        with gr.Tab("Logs"):
            n_lines = gr.Slider(minimum=10, maximum=100, value=50, step=10, label="Number of log lines")
            refresh_logs = gr.Button("Refresh Logs")
            logs_output = gr.Textbox(label="Application Logs")

            refresh_logs.click(
                analyzer.get_logs,
                inputs=n_lines,
                outputs=logs_output
            )
        with gr.Tab("Graph Visualization"):
            repo_name_graph = gr.Textbox(label="Repository Name (e.g., 'owner/repo')")
            visualize_button = gr.Button("Visualize Graph")
            graph_output = gr.JSON(label="Graph Data")  # Use gr.JSON to display the data

            visualize_button.click(
                analyzer.get_graph_data,
                inputs=repo_name_graph,
                outputs=graph_output
            )


    return demo

if __name__ == "__main__":
    demo = create_ui()
    demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0b58d225168036e986.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


ERROR:__main__:Error initializing clients: 
