# Choose random PRs

Fetch all PRs merged between two dates

Select a random subset of those PRs.

Configure the following variables:
* `FROM_DATE`
* `TO_DATE`
* `SAMPLE_SIZE`

In [9]:
import random
from operator import itemgetter
from typing import Dict
from django.conf import settings
import apps.patterns.github as gh

FROM_DATE="2021-01-28"
TO_DATE="2021-02-04"
SAMPLE_SIZE=50


def calculate_score(payload: Dict) -> float:
    base_score = 1
    commits_points = payload["commits"] * 10
    files_points = payload["changed_files"] * 10
    base_score += payload["additions"]
    base_score -= payload["deletions"]
    return base_score * commits_points + files_points
    
    
def calculate_score_v1(payload: Dict) -> float:
    return payload["commits"] + payload["changed_files"]

skipped_authors = [
    'sebastialonso', 'dmarquezlutfy', 'dvelez-jimenez',
    'helderthh', 'viktorvillalobos', 'susana-hernandez',
    'amontanezp', 'lbenitez000', 'fsobarzo'
]
github_repo = gh.get_repo()
pr_query = f"repo:{settings.BACKEND_REPO_SLUG} is:pr is:open draft:false created:{FROM_DATE}..{TO_DATE}"
print("Searching PR matching with parameters...")
found_prs = gh.search_issues_with_query(query=pr_query)

if len(found_prs) > SAMPLE_SIZE:
    found_prs = found_prs[:SAMPLE_SIZE]
    print(f"Found {len(found_prs)}, but will only fetch {SAMPLE_SIZE}")
else:
    print(f"Found {len(found_prs)}")

complete_prs = []
print("Fetching PRs...", end="")
for count, pr in enumerate(found_prs):
    print(f"{round(count/len(found_prs), 2)}", end="...")
    pull_request = github_repo.get_pull(pr.number)
    
    if pull_request.user.login in skipped_authors:
        continue
    if "experimental/" in pull_request.head.label or "canary/" in pull_request.head.label:
        continue

    payload = dict(
        title=pull_request.title,
        head=pull_request.head.label,
        number=pull_request.number,
        url=pull_request.html_url,
        commits=pull_request.commits,
        additions=pull_request.additions,
        deletions=pull_request.deletions,
        changed_files=pull_request.changed_files,
        merged_at=pull_request.merged_at,
        created_at=pull_request.created_at,
        updated_at=pull_request.updated_at,
        user=pull_request.user.login
    )
    payload["score"] = calculate_score_v1(payload)
    complete_prs.append(payload)
print()
print(f"Total: {len(found_prs)}")
prs = sorted(complete_prs, key=itemgetter('score'), reverse=True)
print(f"Selecting worst top-{SAMPLE_SIZE} (from {FROM_DATE} to {TO_DATE})")
print()
for pr in prs[:SAMPLE_SIZE]:
    print(f"{pr['head']} {pr['url']} ({pr['user']}) ({pr['score']})")
    print()

Searching PR matching with parameters...
Found 44
Fetching PRs...0.0...0.02...0.05...0.07...0.09...0.11...0.14...0.16...0.18...0.2...0.23...0.25...0.27...0.3...0.32...0.34...0.36...0.39...0.41...0.43...0.45...0.48...0.5...0.52...0.55...0.57...0.59...0.61...0.64...0.66...0.68...0.7...0.73...0.75...0.77...0.8...0.82...0.84...0.86...0.89...0.91...0.93...0.95...0.98...
Total: 44
Selecting worst top-50 (from 2021-01-28 to 2021-02-04)
cornershop:feat/payroll/ch61553/create-cr-generic-payroll-including-tipping https://github.com/cornershop/cornershop-backend/pull/19306 (pgarcia14180) (24)

cornershop:feat/ch59050/shopify-purolator https://github.com/cornershop/cornershop-backend/pull/19309 (pfariaz) (21)

cornershop:chore/payroll/ch61254/create-colombian-generic-payroll-including-tipping https://github.com/cornershop/cornershop-backend/pull/19302 (pgarcia14180) (19)

cornershop:feat/tms/ch58787/implement-new-assigner https://github.com/cornershop/cornershop-backend/pull/19339 (ivanACL) (18)



In [17]:
prs

[{'number': 15539,
  'url': 'https://github.com/cornershop/cornershop-backend/pull/15539',
  'commits': 360,
  'additions': 10095,
  'deletions': 30,
  'changed_files': 97,
  'merged_at': datetime.datetime(2021, 1, 8, 12, 20, 58),
  'created_at': datetime.datetime(2020, 10, 16, 16, 38, 54),
  'updated_at': datetime.datetime(2021, 1, 8, 12, 20, 59),
  'user': 'saherp145',
  'score': 457},
 {'number': 16720,
  'url': 'https://github.com/cornershop/cornershop-backend/pull/16720',
  'commits': 116,
  'additions': 4350,
  'deletions': 1081,
  'changed_files': 101,
  'merged_at': datetime.datetime(2021, 1, 6, 12, 34, 45),
  'created_at': datetime.datetime(2020, 11, 19, 21, 0, 59),
  'updated_at': datetime.datetime(2021, 1, 6, 12, 34, 46),
  'user': 'bixtope',
  'score': 217},
 {'number': 18041,
  'url': 'https://github.com/cornershop/cornershop-backend/pull/18041',
  'commits': 34,
  'additions': 1509,
  'deletions': 238,
  'changed_files': 65,
  'merged_at': datetime.datetime(2021, 1, 5, 12