In [1]:
from neo4j import GraphDatabase

# Connect to the Neo4j database
class Neo4jConnection:
    def __init__(self, uri, user, password):
        self._driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self._driver.close()

    def query(self, query, parameters=None):
        with self._driver.session() as session:
            return session.run(query, parameters)

# Initialize the connection
neo4j_conn = Neo4jConnection(uri= "<>", user= "<>", password= "<>")

In [2]:
from py2neo import Graph

# Connect to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "123456789"))


In [10]:
query = """
// Input Parameters
WITH [
    "Ruby" // Input technologies
] AS techInput,
["ATS", "SmartMatch"] AS prDomains, // Input PR domains
["file4.ruby", "file2.py"] AS fileChanges, // File changes in the PR
["nhi"] AS prContributors // Contributors to the PR

// Step 1: Find Eligible Developers
MATCH (dev:Developer)
WHERE NOT dev.id IN prContributors // Exclude contributors

// Step 2: Skill Match Score
OPTIONAL MATCH (dev)-[hs:DEVELOPER_HAS_SKILL]->(skill:Skill)
WHERE skill.name IN techInput
WITH dev, 
     prDomains, fileChanges, // Pass variables forward
     SUM(hs.level) AS actualSkillScore, 
     COUNT(skill) AS matchedSkills, 
     SIZE(techInput) * 5 AS maxSkillScore

WITH dev,
     prDomains, fileChanges,
     (TOFLOAT(actualSkillScore) / maxSkillScore) AS skillMatchScore

// Step 3: Work History Score
OPTIONAL MATCH (dev)-[contributed:DEVELOPER_CONTRIBUTED_TO]->(pr:PR)-[:PR_AFFECTS_FILE]->(file:File)
WHERE file.name IN fileChanges
WITH dev, prDomains, fileChanges, skillMatchScore,
     COUNT(DISTINCT file) AS contributionCount

WITH dev, prDomains, fileChanges, skillMatchScore,
     CASE contributionCount
         WHEN 0 THEN 0
         WHEN 1 THEN 0.55
         WHEN 2 THEN 0.75
         ELSE 1.0
     END AS workHistoryScore

// Step 4: Review Quality Score
OPTIONAL MATCH (dev)-[reviewed:DEVELOPER_REVIEWED]->(pr:PR)-[:PR_AFFECTS_FILE]->(file:File)
WHERE file.name IN fileChanges
WITH dev, prDomains, fileChanges, skillMatchScore, workHistoryScore,
     COALESCE(AVG(reviewed.review_score), 0) AS reviewQualityScore

// Step 5: Code Review Frequency and Availability Scores
WITH dev, prDomains, fileChanges, skillMatchScore, workHistoryScore, reviewQualityScore,
     dev.code_review_frequent * 0.2 AS reviewFrequencyScore,
     dev.availability * 0.2 AS availabilityScore

// Step 6: Domain Knowledge Score
OPTIONAL MATCH (dev)-[:IS_MEMBER_OF_TEAM|:LEADER_OF]->(team:Team)
WITH dev, prDomains, skillMatchScore, workHistoryScore, reviewQualityScore, 
     reviewFrequencyScore, availabilityScore,
     COUNT(DISTINCT CASE WHEN ANY(domain IN team.domains WHERE domain IN prDomains) THEN 1 END) AS domainMatches,
     SIZE(prDomains) AS totalDomains

WITH dev, skillMatchScore, workHistoryScore, reviewQualityScore, 
     reviewFrequencyScore, availabilityScore,
     (TOFLOAT(domainMatches) / totalDomains) AS domainKnowledgeScore

// Step 7: Organizational Priority Score
OPTIONAL MATCH (dev)-[:DEVELOPER_MENTOR]->(mentee:Developer)
WITH dev, skillMatchScore, workHistoryScore, reviewQualityScore, 
     reviewFrequencyScore, availabilityScore, domainKnowledgeScore,
     CASE WHEN mentee IS NOT NULL THEN 1.0 ELSE 0.5 END AS orgPriorityScore

// Step 8: Combine All Scores with Weights
WITH dev,
     (skillMatchScore * 0.2) +
     (workHistoryScore * 0.2) +
     (reviewQualityScore * 0.2) +
     (reviewFrequencyScore * 0.1) +
     (availabilityScore * 0.1) +
     (domainKnowledgeScore * 0.1) +
     (orgPriorityScore * 0.05) AS totalScore,
     skillMatchScore, workHistoryScore, reviewQualityScore, 
     reviewFrequencyScore, availabilityScore, domainKnowledgeScore

// Step 9: Return Top 3 Developers
RETURN dev.id AS developer, dev.name AS name, totalScore, skillMatchScore, workHistoryScore, reviewQualityScore, 
     reviewFrequencyScore, availabilityScore, domainKnowledgeScore
ORDER BY totalScore DESC
LIMIT 3;
"""
# results = neo4j_conn.query(query)  # For Neo4j Driver
# OR
results = graph.run(query).data()  # For py2neo
print(results)

[{'developer': 'duong', 'name': 'Duong', 'totalScore': 0.5250000000000001, 'skillMatchScore': 1.0, 'workHistoryScore': 0.55, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 0.6000000000000001, 'domainKnowledgeScore': 0.5}, {'developer': 'sang', 'name': 'Sang', 'totalScore': 0.25500000000000006, 'skillMatchScore': 0.0, 'workHistoryScore': 0, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 1.0, 'domainKnowledgeScore': 0.5}, {'developer': 'hani', 'name': 'Hani', 'totalScore': 0.215, 'skillMatchScore': 0.0, 'workHistoryScore': 0, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 0.6000000000000001, 'domainKnowledgeScore': 0.5}]


In [26]:
from llama_cpp import Llama

# Load the Mistral model
model = Llama(model_path="./mistral-7b-openorca.Q4_0.gguf", n_gpu_layers=-1, n_ctx=4096)

# Prepare the prompt
prompt = """
Based on the following developer information, suggest the top 3 developers to review a pull request:
Developers:
- Name: Sang, Review Score: 99%, Responsiveness: 5
- Name: Quang, Review Score: 80%, Responsiveness: 4
- Name: Hani, Review Score: 60%, Responsiveness: 3

Criteria include skill match, review quality, and responsiveness.
"""

# Generate response
response = model(prompt, max_tokens=200)
print(response["choices"][0]["text"])

llama_load_model_from_file: using device Metal (Apple M2 Pro) - 21412 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ./mistral-7b-openorca.Q4_0.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = open-orca_mistral-7b-openorca
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32       


1. Sang - Skill match: 99%, Review Quality: 99%, Responsiveness: 5
2. Quang - Skill match: 80%, Review Quality: 80%, Responsiveness: 4
3. Hani - Skill match: 60%, Review Quality: 60%, Responsiveness: 3

The top 3 developers to review a pull request are:
1. Sang
2. Quang
3. Hani


In [25]:
import llama_cpp
print(llama_cpp.__version__)


0.3.2


In [33]:
def recommend_reviewers(pr_domains, tech_stack, files, pr_contributors):
    # Query Neo4j
    query = f"""
    WITH {tech_stack} AS techInput,
        {pr_domains} AS prDomains, // Input PR domains
        {files} AS fileChanges, // File changes in the PR
        {pr_contributors} AS prContributors // Contributors to the PR

        // Step 1: Find Eligible Developers
        MATCH (dev:Developer)
        WHERE NOT dev.id IN prContributors // Exclude contributors

        // Step 2: Skill Match Score
        OPTIONAL MATCH (dev)-[hs:DEVELOPER_HAS_SKILL]->(skill:Skill)
        WHERE skill.name IN techInput
        WITH dev, 
            prDomains, fileChanges, // Pass variables forward
            SUM(hs.level) AS actualSkillScore, 
            COUNT(skill) AS matchedSkills, 
            SIZE(techInput) * 5 AS maxSkillScore

        WITH dev,
            prDomains, fileChanges,
            (TOFLOAT(actualSkillScore) / maxSkillScore) AS skillMatchScore

        // Step 3: Work History Score
        OPTIONAL MATCH (dev)-[contributed:DEVELOPER_CONTRIBUTED_TO]->(pr:PR)-[:PR_AFFECTS_FILE]->(file:File)
        WHERE file.name IN fileChanges
        WITH dev, prDomains, fileChanges, skillMatchScore,
            COUNT(DISTINCT file) AS contributionCount

        WITH dev, prDomains, fileChanges, skillMatchScore,
            CASE contributionCount
                WHEN 0 THEN 0
                WHEN 1 THEN 0.55
                WHEN 2 THEN 0.75
                ELSE 1.0
            END AS workHistoryScore

        // Step 4: Review Quality Score
        OPTIONAL MATCH (dev)-[reviewed:DEVELOPER_REVIEWED]->(pr:PR)-[:PR_AFFECTS_FILE]->(file:File)
        WHERE file.name IN fileChanges
        WITH dev, prDomains, fileChanges, skillMatchScore, workHistoryScore,
            COALESCE(AVG(reviewed.review_score), 0) AS reviewQualityScore

        // Step 5: Code Review Frequency and Availability Scores
        WITH dev, prDomains, fileChanges, skillMatchScore, workHistoryScore, reviewQualityScore,
            dev.code_review_frequent * 0.2 AS reviewFrequencyScore,
            dev.availability * 0.2 AS availabilityScore

        // Step 6: Domain Knowledge Score
        OPTIONAL MATCH (dev)-[:IS_MEMBER_OF_TEAM|:LEADER_OF]->(team:Team)
        WITH dev, prDomains, skillMatchScore, workHistoryScore, reviewQualityScore, 
            reviewFrequencyScore, availabilityScore,
            COUNT(DISTINCT CASE WHEN ANY(domain IN team.domains WHERE domain IN prDomains) THEN 1 END) AS domainMatches,
            SIZE(prDomains) AS totalDomains

        WITH dev, skillMatchScore, workHistoryScore, reviewQualityScore, 
            reviewFrequencyScore, availabilityScore,
            (TOFLOAT(domainMatches) / totalDomains) AS domainKnowledgeScore

        // Step 7: Organizational Priority Score
        OPTIONAL MATCH (dev)-[:DEVELOPER_MENTOR]->(mentee:Developer)
        WITH dev, skillMatchScore, workHistoryScore, reviewQualityScore, 
            reviewFrequencyScore, availabilityScore, domainKnowledgeScore,
            CASE WHEN mentee IS NOT NULL THEN 1.0 ELSE 0.5 END AS orgPriorityScore

        // Step 8: Combine All Scores with Weights
        WITH dev,
            (skillMatchScore * 0.2) +
            (workHistoryScore * 0.2) +
            (reviewQualityScore * 0.2) +
            (reviewFrequencyScore * 0.1) +
            (availabilityScore * 0.1) +
            (domainKnowledgeScore * 0.1) +
            (orgPriorityScore * 0.05) AS totalScore,
            skillMatchScore, workHistoryScore, reviewQualityScore, 
            reviewFrequencyScore, availabilityScore, domainKnowledgeScore

        // Step 9: Return Top 3 Developers
        RETURN dev.id AS developer, dev.name AS name, totalScore, skillMatchScore, workHistoryScore, reviewQualityScore, 
            reviewFrequencyScore, availabilityScore, domainKnowledgeScore
        ORDER BY totalScore DESC
        LIMIT 3;
    """
    results = graph.run(query).data()
    print(results)

    # Prepare the prompt
    prompt = f"""
    Based on the following developer information, suggest the top 3 developers to review a pull request:
    {results}

    Criteria include skill match, review quality, and responsiveness.
    """

    # Generate response using Mistral
    response = model(prompt, max_tokens=200)
    return response["choices"][0]["text"]

# Example usage
print(recommend_reviewers(["ATS", "SmartMatch"], ["Ruby", "ReactJS"], ["file4.ruby", "file2.py"], ["nhi"]))

[{'developer': 'duong', 'name': 'Duong', 'totalScore': 0.42500000000000004, 'skillMatchScore': 0.5, 'workHistoryScore': 0.55, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 0.6000000000000001, 'domainKnowledgeScore': 0.5}, {'developer': 'sang', 'name': 'Sang', 'totalScore': 0.35500000000000004, 'skillMatchScore': 0.5, 'workHistoryScore': 0, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 1.0, 'domainKnowledgeScore': 0.5}, {'developer': 'quang', 'name': 'Quang', 'totalScore': 0.28500000000000003, 'skillMatchScore': 0.4, 'workHistoryScore': 0, 'reviewQualityScore': 0, 'reviewFrequencyScore': 0.8, 'availabilityScore': 1.0, 'domainKnowledgeScore': 0.0}]


Llama.generate: 25 prefix-match hit, remaining 336 prompt tokens to eval
llama_perf_context_print:        load time =    1313.32 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   336 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   108 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    5468.31 ms /   444 tokens



    Based on the given developer information, the top 3 developers to review a pull request would be:
    1. Duong
    2. Sang
    3. Quang

    The ranking is determined by the total score, with Duong having the highest score at 0.425, Sang at 0.355, and Quang at 0.285. Duong's higher total score indicates that he is more suitable for reviewing the pull request.


In [None]:
from llama_cpp import Llama

# Load the Mistral model
model = Llama(model_path="./mistral-7b-openorca.Q4_0.gguf", n_gpu_layers=-1, n_ctx=4096, )

# Prepare the prompt
prompt = """
Schema Description:
- Nodes:
    Developer: {id: string, name: string, code_review_frequent: int, review_score: float, responsiveness: int, availability: int}
    Skill: {id: string, name: string}
    PR: {id: string, title: string, description: string, timestamp: string, domains: array[string]}
    File: {id: string, name: string, domain: string}
    Team: {id: string, name: string, domains: array[string], leader: string}
- Relationships:
    DEVELOPER_HAS_SKILL (Developer -> Skill): {level: int}
    DEVELOPER_CONTRIBUTED_TO (Developer -> PR): {timestamp: string}
    DEVELOPER_REVIEWED (Developer -> PR): {file_reviewed: array[string], review_score: float}
    PR_AFFECTS_FILE (PR -> File)
    DEVELOPER_IS_MEMBER_OF (Developer -> Team)
    CODEOWNER_OF (Team -> File)
    DEVELOPER_MENTOR (Developer -> Developer)

Task:
Generate a Cypher query based on the following requirement:

Requirement:
"Find the top 3 developers with the most experience in ReactJS. Include their name, responsiveness, review score, and code review frequency. Sort by review score in descending order, followed by responsiveness and code review frequency in descending order."

Output Format:
Return only the Cypher query, without any explanations or additional text. The output should strictly adhere to the following format:

```cypher
<Generated Cypher Query>
"""

# Generate response
response = model(prompt, max_tokens=300)

query = response['choices'][0]['text'];
print(query)

# Get the format cypher query

results = graph.run(query).data()
print(results)

# print(response['choices'][0]['text'])
# if isinstance(response, dict) and 'choices' in response:
#     for choice in response['choices']:
#         print(choice['text'].strip())
# else:
#     print(response)

llama_load_model_from_file: using device Metal (Apple M2 Pro) - 17182 MiB free
llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ./mistral-7b-openorca.Q4_0.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = open-orca_mistral-7b-openorca
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32       


Cypher Query:
MATCH (d:Developer) -[:DEVELOPER_HAS_SKILL]-> (s:Skill) WHERE (s.name = 'ReactJS')
RETURN d.name AS Name, d.responsiveness AS Responsiveness, d.review_score AS ReviewScore, d.code_review_frequent AS CodeReviewFrequency
ORDER BY d.review_score DESC, d.responsiveness DESC, d.code_review_frequent DESC LIMIT 3

Explanation:
1. The query first identifies developers with the skill 'ReactJS' by matching a Developer with a Skill node.
2. The WHERE clause filters for developers with the ReactJS skill.
3. The RETURN statement selects the Developer node properties: Name, Responsiveness, ReviewScore, and CodeReviewFrequency.
4. The ORDER BY clause sorts the results in descending order by ReviewScore, then Responsiveness, and finally CodeReviewFrequency.
5. The LIMIT clause sets the number of results to return to 3.


ClientError: [Statement.SyntaxError] Invalid input 'Query': expected 'FOREACH', 'ALTER', 'ORDER BY', 'CALL', 'USING PERIODIC COMMIT', 'CREATE', 'LOAD CSV', 'START DATABASE', 'STOP DATABASE', 'DEALLOCATE', 'DELETE', 'DENY', 'DETACH', 'DROP', 'DRYRUN', 'FINISH', 'GRANT', 'INSERT', 'LIMIT', 'MATCH', 'MERGE', 'NODETACH', 'OFFSET', 'OPTIONAL', 'REALLOCATE', 'REMOVE', 'RENAME', 'RETURN', 'REVOKE', 'ENABLE SERVER', 'SET', 'SHOW', 'SKIP', 'TERMINATE', 'UNWIND', 'USE' or 'WITH' (line 2, column 8 (offset: 8))
"Cypher Query:"
        ^