From c0c155038bb8aeac6036e3236e0187e7490f8275 Mon Sep 17 00:00:00 2001
From: Ross Barnowski <rossbar@berkeley.edu>
Date: Thu, 3 Feb 2022 21:21:33 -0800
Subject: [PATCH 1/5] Rm click (for now).

---
 issues.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/issues.py b/issues.py
index 8809c3b..4deb636 100644
--- a/issues.py
+++ b/issues.py
@@ -1,7 +1,6 @@
 import os
 import requests
 import json
-import click
 
 token = os.environ["GRAPH_API_KEY"]
 endpoint = r"https://api.github.com/graphql"
@@ -278,11 +277,6 @@ def dump(self, outfile):
             json.dump(self.raw_data, outf)
 
 
-@click.command()
-def cli():
-    pass
-
-
 if __name__ == "__main__":
     grabber = GithubIssueGrabber('query_examples/issue_activity_since_date.gql')
     grabber.get()

From d8cd711787c9527f47a1ffb83c72e218a88b69d0 Mon Sep 17 00:00:00 2001
From: Ross Barnowski <rossbar@berkeley.edu>
Date: Thu, 3 Feb 2022 21:24:06 -0800
Subject: [PATCH 2/5] Remove issue-specific helper functions.

---
 issues.py | 136 ------------------------------------------------------
 1 file changed, 136 deletions(-)

diff --git a/issues.py b/issues.py
index 4deb636..a1c5a5e 100644
--- a/issues.py
+++ b/issues.py
@@ -89,142 +89,6 @@ def parse_single_issue_query(data):
         raise e
     return data, last_cursor, total_num_issues
 
-def to_ndata(data):
-    """
-    Parse the raw json returned by a GitHub GraphQL query into an issue
-    dictionary.
-
-    Parameters
-    ----------
-    data : dict
-        The result of `get_all_responses` or `parse_single_issue_query`
-
-    Returns
-    -------
-    ndata : dict
-        A dictionary of issues where the keys are the issue numbers.
-        This is the data format expected by subsequent filtering/summarizing
-        functions.
-    """
-    # Dict & list comps to decompose json
-    ndata = {
-        n['node']['number'] : {
-            'title' : n['node']['title'],
-            'url'   : n['node']['url'],
-            'numrefs' : n['node']['timelineItems']['totalCount'],
-            'labels'  : [lbl['node']['name'] for lbl in n['node']['labels']['edges']]
-        } for n in data
-    }
-    return ndata
-
-def filter_issues_by_label(ndata, labels_to_filter=("Triaged",)):
-    """
-    Remove nodes from parsed node data if the node's labels contain any of
-    the labels in labels_to_filter
-
-    Parameters
-    ----------
-    ndata : dict
-        Dictionary of node data parsed from query response.
-        Result of to_ndata(query_response)
-
-    labels_to_filter : tuple
-        Tuple of strings containing the names of labels to filter by
-
-    Returns
-    -------
-    filtered_ndata : dict
-        Dictionary of node data with specified nodes filtered out.
-    """
-    if type(labels_to_filter) is not tuple:
-        raise TypeError('labels_to_filter must be a tuple of strings')
-    lbls = set(labels_to_filter)
-
-    return { 
-        k : v for k, v in ndata.items() if len(lbls.intersection(set(v['labels']))) == 0
-    }
-
-def filter_issues_apply_blacklist(ndata, blacklist):
-    """
-    Remove nodes from parsed node data if the node's number (i.e. the issue
-    number) is on the blacklist.
-
-    Parameters
-    ----------
-    ndata : dict
-        Dictionary of node data parsed from query response.
-        Result of to_ndata(query_response)
-
-    blacklist: tuple
-        Tuple of ints containing the issue IDs to filter.
-
-    Returns
-    -------
-    filtered_ndata : dict
-        Dictionary of node data with specified nodes filtered out.
-    """
-    if type(blacklist) is not tuple:
-        raise TypeError('blacklist bust be a tuple of integers')
-    blacklist = set(blacklist)
-
-    return {
-        k : v for k, v in ndata.items() if k not in blacklist
-    }
-
-def generate_table(ndata, idx, num_issues=10):
-    """
-    Generate a markdown-formatted table from the first `num_issues` nodes
-    in `idx`.
-    """
-    # Initialize table
-    mdtable =  '|Iss. \#| xrefs | Issue |\n'
-    mdtable += '|:-----:|:------|:------|\n'
-
-    for issue_num in idx[:num_issues]:
-        node = ndata[issue_num]
-        mdtable += '|{}|{}|[{}]({})\n'.format(
-            issue_num,
-            node['numrefs'],
-            node['title'],
-            node['url']
-        )
-    return mdtable
-
-
-def generate_top_issues_summary(ndata, num_issues=10):
-    """
-    Generate a markdown-formatted table of GitHub issues sorted by 
-    cross-reference count.
-
-    Parameters
-    ----------
-    num_issues : int
-        Number of issues to include in the summary table. Default = 10.
-    ndata : dict
-        Dictionary of node data parsed from query repsonse.
-        Result of to_ndata(query_response)
-
-    Returns
-    -------
-    table : str
-        A string containing a markdown-formatted table with the issue number,
-        number of cross-references, and issue title/url.
-    """
-
-    # Initialize table
-    mdtable =  '|Iss. \#| xrefs | Issue |\n'
-    mdtable += '|:-----:|:------|:------|\n'
-
-    # Sort data by num xrefs and generate summary
-    for node in sorted(ndata.items(), key=lambda x: x[1]['numrefs'], reverse=True)[:num_issues]:
-        mdtable += '|{}|{}|[{}]({})\n'.format(
-            node[0],
-            node[1]['numrefs'],
-            node[1]['title'],
-            node[1]['url']
-        )
-    return mdtable
-
 
 class GithubIssueGrabber:
     """

From d44388db2b064b7635115f3fd58b58f6baa8bead Mon Sep 17 00:00:00 2001
From: Ross Barnowski <rossbar@berkeley.edu>
Date: Thu, 3 Feb 2022 21:55:43 -0800
Subject: [PATCH 3/5] Pull in pr query from other branch.

---
 query_examples/pr_data_query.gql | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 query_examples/pr_data_query.gql

diff --git a/query_examples/pr_data_query.gql b/query_examples/pr_data_query.gql
new file mode 100644
index 0000000..55274db
--- /dev/null
+++ b/query_examples/pr_data_query.gql
@@ -0,0 +1,32 @@
+query {
+  repository(owner:"_REPO_OWNER_", name:"_REPO_NAME_") {
+    pullRequests(first:100) {
+      totalCount
+      edges {
+        cursor
+        node{
+          number
+          state
+          title
+          createdAt
+          baseRefName
+          mergeable
+          author{
+            login
+          }
+          authorAssociation
+          mergedBy{
+            login
+          }
+          mergedAt
+          reviews(states:APPROVED){
+            totalCount
+          }
+          participants(first:100){
+            totalCount
+          }
+        }
+      }
+    }
+  }
+}

From 9dfc0708b4d0c7f50352f8672afce41c2ced47d3 Mon Sep 17 00:00:00 2001
From: Ross Barnowski <rossbar@berkeley.edu>
Date: Thu, 3 Feb 2022 21:56:51 -0800
Subject: [PATCH 4/5] Support issue and PR queries with the same interface.

Needs a lot more work.
---
 issues.py | 107 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 80 insertions(+), 27 deletions(-)

diff --git a/issues.py b/issues.py
index a1c5a5e..8c18b2f 100644
--- a/issues.py
+++ b/issues.py
@@ -43,59 +43,94 @@ def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
     return query
 
 
-def send_query(query, cursor=None):
+def send_query(query, query_type, cursor=None):
     """
-    Helper function to use the graphql query example in `query_examples`
-    to retrieve open numpy issues and all cross references
+    Send a GraphQL query via requests.post
+
+    No validation is done on the query before sending. GitHub GraphQL is
+    supported with the `cursor` argument.
+
+    Parameters
+    ----------
+    query : str
+        The GraphQL query to be sent
+    query_type : {"issues", "pullRequests"}
+        The object being queried according to the GitHub GraphQL schema.
+        Currently only issues and pullRequests are supported
+    cursor : str, optional
+        If given, then the cursor is injected into the query to support
+        GitHub's GraphQL pagination.
+
+    Returns
+    -------
+    dict
+        The result of the query (json) parsed by `json.loads`
+
+    Notes
+    -----
+    This is intended mostly for internal use within `get_all_responses`.
     """
-    # Modifications to request template
-    # TODO: Unhack this
-    # WARNING: This hack relies on specific structure of issues query
+    # TODO: Expand this, either by parsing the query type from the query
+    # directly or manually adding more query_types to the set
+    if query_type not in {"issues", "pullRequests"}:
+        raise ValueError(
+            "Only 'issues' and 'pullRequests' queries are currently supported"
+        )
+    # TODO: Generalize this
+    # WARNING: The cursor injection depends on the specific structure of the
+    # query, this is the main reason why query types are limited to issues/PRs
     if cursor is not None:
-        cursor_ind = query.find("issues(") + len("issues(")
+        cursor_insertion_key = query_type + "("
+        cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
         query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
     # Build request payload
     payload = {'query' : ''.join(query.split('\n'))}
     response = requests.post(endpoint, json=payload, headers=headers)
     return json.loads(response.content)
 
-def get_all_responses(query):
+def get_all_responses(query, query_type):
     """
     Helper function to bypass GitHub GraphQL API node limit.
     """
     # Get data from a single response
-    initial_data = send_query(query)
-    data, last_cursor, total_num_issues = parse_single_issue_query(initial_data)
-    print("Retrieving {} out of {} values...".format(len(data), total_num_issues))
-    # Continue requesting issues (with pagination) until all are acquired
-    while len(data) < total_num_issues:
-        rdata = send_query(query, cursor=last_cursor)
-        pdata, last_cursor, _ = parse_single_issue_query(rdata)
+    initial_data = send_query(query, query_type)
+    data, last_cursor, total_count = parse_single_query(initial_data, query_type)
+    print(f"Retrieving {len(data)} out of {total_count} values...")
+    # Continue requesting data (with pagination) until all are acquired
+    while len(data) < total_count:
+        rdata = send_query(query, query_type, cursor=last_cursor)
+        pdata, last_cursor, _ = parse_single_query(rdata, query_type)
         data.extend(pdata)
-        print("Retrieving {} out of {} values...".format(len(data), total_num_issues))
+        print(f"Retrieving {len(data)} out of {total_count} values...")
     print("Done.")
     return data
 
-def parse_single_issue_query(data):
+def parse_single_query(data, query_type):
     """
-    Parse the raw json returned by get_open_numpy_issues_with_crossrefs.
+    Parse the data returned by `send_query`
+
+    .. warning::
+       
+       Like `send_query`, the logic here depends on the specific structure
+       of the query (e.g. it must be an issue or PR query, and must have a
+       total count).
     """
     try:
-        total_num_issues = data['data']['repository']['issues']['totalCount']
-        data = data['data']['repository']['issues']['edges']
+        total_count = data['data']['repository'][query_type]['totalCount']
+        data = data['data']['repository'][query_type]['edges']
         last_cursor = data[-1]['cursor']
     except KeyError as e:
         print(data)
         raise e
-    return data, last_cursor, total_num_issues
+    return data, last_cursor, total_count
 
 
-class GithubIssueGrabber:
+class GithubGrabber:
     """
     Pull down data via the GitHub APIv.4 given a valid GraphQL query.
     """
 
-    def __init__(self, query_fname, repo_owner="numpy", repo_name="numpy"):
+    def __init__(self, query_fname, query_type, repo_owner="numpy", repo_name="numpy"):
         """
         Create an object to send/recv queries related to the issue tracker
         for the given repository via the GitHub API v.4.
@@ -108,12 +143,16 @@ def __init__(self, query_fname, repo_owner="numpy", repo_name="numpy"):
         query_fname : str
             Path to a valid GraphQL query conforming to the GitHub GraphQL
             schema
+        query_type : {"issues", "pullRequests"}
+            Type of object that is being queried according to the GitHub GraphQL
+            schema. Currently only "issues" and "pullRequests" are supported.
         repo_owner : str
             Repository owner. Default is "numpy"
         repo_name : str
             Repository name. Default is "numpy"
         """
         self.query_fname = query_fname
+        self.query_type = query_type  # TODO: Parse this directly from query
         self.repo_owner = repo_owner
         self.repo_name = repo_name
         self.raw_data = None
@@ -128,7 +167,7 @@ def get(self):
         """
         Get JSON-formatted raw data from the query.
         """
-        self.raw_data = get_all_responses(self.query)
+        self.raw_data = get_all_responses(self.query, self.query_type)
 
     def dump(self, outfile):
         """
@@ -142,6 +181,20 @@ def dump(self, outfile):
 
 
 if __name__ == "__main__":
-    grabber = GithubIssueGrabber('query_examples/issue_activity_since_date.gql')
-    grabber.get()
-    grabber.dump("_data/issues.json")
+    repo = "networkx"
+    issues = GithubGrabber(
+        'query_examples/issue_activity_since_date.gql',
+        'issues',
+        repo_owner=repo,
+        repo_name=repo,
+    )
+    issues.get()
+    issues.dump(f"_data/{repo}_issues.json")
+    prs = GithubGrabber(
+        'query_examples/pr_data_query.gql',
+        'pullRequests',
+        repo_owner=repo,
+        repo_name=repo,
+    )
+    prs.get()
+    prs.dump(f"_data/{repo}_prs.json")

From 11ec045c7295a832f3ca92a4b27aadb0e1d7b3cb Mon Sep 17 00:00:00 2001
From: Ross Barnowski <rossbar@berkeley.edu>
Date: Thu, 3 Feb 2022 21:57:29 -0800
Subject: [PATCH 5/5] Update filename to reflect more general nature.

---
 issues.py => query.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename issues.py => query.py (100%)

diff --git a/issues.py b/query.py
similarity index 100%
rename from issues.py
rename to query.py