In [1]:
from datetime import datetime
from time import sleep

import gerrit
import github
from util import create_communication_channels, store_communication_channels

from tqdm.auto import tqdm

In [2]:
t_1, t_2 = datetime.fromisoformat('2024-03-04'), datetime.fromisoformat('2024-04-01')

In [None]:
gh = github.GitHubAPI('GITHUB_API_TOKEN')

## React

In [None]:
react_pulls = gh.query('repos/facebook/react/pulls', params={'state': 'all', 'per_page': 100})
tries = 10
params = {'per_page': 100}
react_timelines = {}
for pull in tqdm(react_pulls, desc='Collect timeline from pulls'):
    pull_number = pull['number']

    # query fails with with ChunkedEncodingError from time to time; well done, GitHub, well done
    for i in range(tries):  
        try:
            timeline = gh.query(f'repos/facebook/react/issues/{pull_number}/timeline', params=params)
        except Exception:
            if i < tries - 1: 
                params = {}
                sleep(2**i) # some backoff
                continue
            else:
                raise
        break
    
react_activities = github.extract_activities(react_pulls, react_timelines)

In [None]:
sorted([user_id for user_id in react_activities.user_id.unique() if user_id and 'bot' in user_id])

In [None]:
react_bots = [
    'react-sizebot',
    'facebook-github-bot',
    'vercel[bot]',
    'github-actions[bot]',
    'dependabot[bot]',
    # 'mobot11', # https://github.com/mobot11
    'codesandbox-ci[bot]',
    'sizebot',
    #  'ybot1122',# https://github.com/ybot1122
    'inclusive-coding-bot',
    'stale[bot]',
    #  'maksbotan', # https://github.com/maksbotan
    #  'benbot', # https://github.com/benbot
    'pull-bot',
    #  'rowinbot', # https://github.com/rowinbot
    'reactjs-bot',
    #  'hprobotic', # https://github.com/hprobotic
    #  'dabbott', # https://github.com/dabbott
    #  'sarbbottam', # https://github.com/sarbbottam
    #  'iRobot98', # https://github.com/iRobot98
    #  'kebot', # https://github.com/kebot
    #  'tobinibot',  # https://github.com/tobinibot
]

In [None]:
react_human_activities = react_activities[~react_activities.user_id.isin(react_bots)]

In [None]:
react_communication_channels = create_communication_channels(react_human_activities, t_1, t_2)

In [None]:
store_communication_channels(react_communication_channels, 'react.json.bz2')

## Visual Studio Code

In [None]:
vscode_pulls = gh.query('repos/microsoft/vscode/pulls', params={'state': 'all', 'per_page': 100})

vscode_timelines = {}
for pull in tqdm(vscode_pulls, desc='Collect timeline from pulls'):
    pull_number = pull['number']
    try:
        timeline = gh.query(f'repos/microsoft/vscode/issues/{pull_number}/timeline', params={'per_page': 100})
        vscode_timelines[pull_number] = timeline
    except Exception:
        # query fails with with ChunkedEncodingError from time to time; well done, GitHub, well done
        timeline = gh.query(f'repos/microsoft/vscode/issues/{pull_number}/timeline')
        vscode_timelines[pull_number] = timeline

vscode_activities = github.extract_activities(vscode_pulls, vscode_timelines)

In [None]:
sorted([user_id for user_id in vscode_activities.user_id.unique() if user_id and 'bot' in user_id])

In [None]:
vscode_bots = [
    'microsoft-github-policy-service[bot]',
    'vs-code-engineering[bot]',
    'dependabot[bot]',
    'faraon-bot',
    'azure-pipelines[bot]',
    # 'MoazzemHossain-bot', # https://github.com/MoazzemHossain-bot
    # 'bot174', # https://github.com/bot174
    'inclusive-coding-bot',
    'magebot1',
    # 'Rezagoli-bot', # https://github.com/Rezagoli-bot
    'github-actions[bot]',
    # 'marrie-bot', # https://github.com/marrie-bot
    'vscodebot[bot]',
    'codetriage-readme-bot',
    'mention-bot',
    # 'vsobotka', # https://github.com/vsobotka
    # 'sarbbottam', # https://github.com/vsobotka
    # 'MattTalbot', # https://github.com/MattTalbot
]

In [None]:
vscode_human_activities = vscode_activities[~vscode_activities.user_id.isin(vscode_bots)]

In [None]:
vscode_communication_channels = create_communication_channels(vscode_human_activities, t_1, t_2)

In [None]:
store_communication_channels(vscode_communication_channels, 'vscode.json.bz2')

## Android

In [None]:
g = gerrit.GerritAPI('https://android-review.googlesource.com/')

In [None]:
android_changes = g.query('changes')

In [None]:
android_activities = gerrit.extract_activities(android_changes)

In [None]:
selected_tags = ('autogenerated:gerrit:newPatchSet', 'autogenerated:gerrit:merge', 'autogenerated:gerrit:abandon', 'autogenerated:gerrit:restore')
android_human_activities = android_activities[android_activities.message_tag.isnull() | android_activities.message_tag.str.contains('mailMessageId') | android_activities.message_tag.isin(selected_tags)]

In [None]:
android_communication_channels = create_communication_channels(android_human_activities, t_1, t_2, code_review_col='change_id', participant_col='_account_id')

In [None]:
store_communication_channels(android_communication_channels, 'android.json.bz2')