In [1]:
import os
import re
import configparser
from datetime import datetime
from github import Github

In [2]:
def read_auth_from_config():
    config = configparser.ConfigParser()
    if os.path.isfile('../utils/cred.conf'):
        config.read('../utils/cred.conf')
        user = config['auth']['username']
        pwd = config['auth']['password']
        token = config['auth']['token']
        return user, pwd, token

In [3]:
def authenticate(token):
    if not token:
        try:
            user, pwd, token = read_auth_from_config()
        except Exception as e:
            print("Config error.")
    return Github(token)

In [4]:
repo_name = 'jupyter_notebook'

In [5]:
gh = authenticate(read_auth_from_config()[2])
repo = gh.get_repo("/".join(repo_name.split('_')))

In [6]:
pull_req_number = 1160

In [7]:
pull = repo.get_pull(pull_req_number) # https://github.com/jupyter/notebook/pull/1160
comments = pull.get_issue_comments()

In [8]:
pull.comments # Total of 49 comments

49

In [9]:
pull.body, pull.user.login

("until we are more ready to emphasize it.\n\nLab's still there, you just have to enter the URL for now.\n\ncc @fperez @ellisonbg @jasongrout @blink1073\n",
 'minrk')

In [10]:
pull.created_at.strftime("%M:%S") # Format required 

'16:45'

In [11]:
pull.diff_url # Someday we can try to use this

'https://github.com/jupyter/notebook/pull/1160.diff'

In [12]:
pull.labels

[Label(name="status:resolved-locked")]

In [13]:
pull.review_comments # If this wasn't 0, then get_review_comments to get review comments

0

In [14]:
def clean_string(line):
    if line.startswith('![') and line.endswith(')'):
        l = "<image>"
        return l
    # replace emojis with tokens
    l = re.sub(r':.{1,8}:', '<emoji>', line)
    # replace users with tokens
    l = re.sub(r'@\w*', '<user>', l)
    # replace italics
    l = re.sub(r'(_)(.*)(_)', r'\2', l)
    # replace urls
    l = re.sub(r'http(s)?:\/\/.*', '<url>', l)
    return l

In [15]:
thread = []
init_comment = []
for line in pull.body.split('\n'): 
    if line:
        init_comment.append(clean_string(line))
init_comment = " ".join(init_comment)
thread.append((pull.created_at.strftime("%M:%S"), pull.user.login, init_comment))

In [16]:
for comment in comments:
    body = comment.body.split('\n')
    cleaned = []
    for line in body:
        # Remove blank lines
        if line:
            # remove quotes
            if line.startswith('>'):
                continue
            cleaned.append(clean_string(line))
    cleaned = " ".join(cleaned)
    thread.append((comment.created_at.strftime("%M:%S"), comment.user.login, cleaned))

In [17]:
for each in thread:
    print(each)
    print()

('16:45', 'minrk', "until we are more ready to emphasize it. Lab's still there, you just have to enter the URL for now. cc <user> <user> <user> <user>")

('21:12', 'dwillmer', "i appreciate your point, but i really think this is the wrong thing to do. it's clearly marked as alpha - is there something specific that you'd like to see before calling it alpha?")

('21:56', 'jasongrout', "I'm comfortable either way, since it's on master - it depends on how many people run on master.  Either way, <user> also suggested we have a very visible notice of what to expect and what won't work.")

('23:38', 'jasongrout', 'The other factor in the decision for me is how close we are to releasing notebook master as a new release. If it\'s soon, then not having the button makes sense to me. If it will be a while, enough so we can get the notebook working "minimally", then I think the button is fine.')

('24:31', 'jasongrout', "But to <user>'s point - let's have a meeting between everyone to draw the feat

In [18]:
with open(f'../dataset/raw/detailed/{repo_name}/{pull_req_number}.raw.txt', 'w') as f:
    for ic in thread:
        line = f"[{ic[0]}] <{ic[1]}> {ic[2]}\n"
        f.write(line)

Rest of the code is for formatting it as per the pointer network requirements.