Skip to content

Commit f117f0a

Browse files
authored
[libc++] Add a script to synchronize status-tracking CSVs with Github issues (#101704)
This script can be run manually to synchronize the CSV files that we use to track Standards Conformance with the Github issues that track our implementation of LWG issues and papers.
1 parent b53fe2c commit f117f0a

File tree

1 file changed

+236
-0
lines changed

1 file changed

+236
-0
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
#!/usr/bin/env python3
2+
# ===----------------------------------------------------------------------===##
3+
#
4+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
# See https://llvm.org/LICENSE.txt for license information.
6+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
#
8+
# ===----------------------------------------------------------------------===##
9+
10+
from typing import List, Dict, Tuple, Optional
11+
import csv
12+
import itertools
13+
import json
14+
import os
15+
import pathlib
16+
import re
17+
import subprocess
18+
19+
# Number of the 'Libc++ Standards Conformance' project on Github
20+
LIBCXX_CONFORMANCE_PROJECT = '31'
21+
22+
class PaperInfo:
23+
paper_number: str
24+
"""
25+
Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
26+
"""
27+
28+
paper_name: str
29+
"""
30+
Plain text string representing the name of the paper.
31+
"""
32+
33+
meeting: Optional[str]
34+
"""
35+
Plain text string representing the meeting at which the paper/issue was voted.
36+
"""
37+
38+
status: Optional[str]
39+
"""
40+
Status of the paper/issue. This must be '|Complete|', '|Nothing To Do|', '|In Progress|',
41+
'|Partial|' or 'Resolved by <something>'.
42+
"""
43+
44+
first_released_version: Optional[str]
45+
"""
46+
First version of LLVM in which this paper/issue was resolved.
47+
"""
48+
49+
labels: Optional[List[str]]
50+
"""
51+
List of labels to associate to the issue in the status-tracking table. Supported labels are
52+
'format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS' and 'DR'.
53+
"""
54+
55+
original: Optional[object]
56+
"""
57+
Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
58+
was used to generate this PaperInfo and is useful for error reporting purposes.
59+
"""
60+
61+
def __init__(self, paper_number: str, paper_name: str,
62+
meeting: Optional[str] = None,
63+
status: Optional[str] = None,
64+
first_released_version: Optional[str] = None,
65+
labels: Optional[List[str]] = None,
66+
original: Optional[object] = None):
67+
self.paper_number = paper_number
68+
self.paper_name = paper_name
69+
self.meeting = meeting
70+
self.status = status
71+
self.first_released_version = first_released_version
72+
self.labels = labels
73+
self.original = original
74+
75+
def for_printing(self) -> Tuple[str, str, str, str, str, str]:
76+
return (
77+
f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
78+
self.paper_name,
79+
self.meeting if self.meeting is not None else '',
80+
self.status if self.status is not None else '',
81+
self.first_released_version if self.first_released_version is not None else '',
82+
' '.join(f'|{label}|' for label in self.labels) if self.labels is not None else '',
83+
)
84+
85+
def __repr__(self) -> str:
86+
return repr(self.original) if self.original is not None else repr(self.for_printing())
87+
88+
def is_implemented(self) -> bool:
89+
if self.status is None:
90+
return False
91+
if re.search(r'(in progress|partial)', self.status.lower()):
92+
return False
93+
return True
94+
95+
@staticmethod
96+
def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
97+
"""
98+
Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
99+
"""
100+
# Extract the paper number from the first column
101+
match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
102+
if match is None:
103+
raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
104+
105+
return PaperInfo(
106+
paper_number=match.group(1),
107+
paper_name=row[1],
108+
meeting=row[2] or None,
109+
status=row[3] or None,
110+
first_released_version=row[4] or None,
111+
labels=[l.strip('|') for l in row[5].split(' ') if l] or None,
112+
original=row,
113+
)
114+
115+
@staticmethod
116+
def from_github_issue(issue: Dict):# -> PaperInfo:
117+
"""
118+
Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
119+
"""
120+
# Extract the paper number from the issue title
121+
match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
122+
if match is None:
123+
raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
124+
paper = match.group(1)
125+
126+
# Figure out the status of the paper according to the Github project information.
127+
#
128+
# Sadly, we can't make a finer-grained distiction about *how* the issue
129+
# was closed (such as Nothing To Do or similar).
130+
status = '|Complete|' if 'status' in issue and issue['status'] == 'Done' else None
131+
132+
# Handle labels
133+
valid_labels = ('format', 'ranges', 'spaceship', 'flat_containers', 'concurrency TS', 'DR')
134+
labels = [label for label in issue['labels'] if label in valid_labels]
135+
136+
return PaperInfo(
137+
paper_number=paper,
138+
paper_name=issue['title'],
139+
meeting=issue.get('meeting Voted', None),
140+
status=status,
141+
first_released_version=None, # TODO
142+
labels=labels if labels else None,
143+
original=issue,
144+
)
145+
146+
def load_csv(file: pathlib.Path) -> List[Tuple]:
147+
rows = []
148+
with open(file, newline='') as f:
149+
reader = csv.reader(f, delimiter=',')
150+
for row in reader:
151+
rows.append(row)
152+
return rows
153+
154+
def write_csv(output: pathlib.Path, rows: List[Tuple]):
155+
with open(output, 'w', newline='') as f:
156+
writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
157+
for row in rows:
158+
writer.writerow(row)
159+
160+
def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
161+
"""
162+
Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
163+
up-to-date (but potentially incomplete) tracking information from Github, this function returns the
164+
new CSV rows synchronized with the up-to-date information.
165+
166+
Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
167+
PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
168+
it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
169+
"""
170+
results = [rows[0]] # Start with the header
171+
for row in rows[1:]: # Skip the header
172+
# If the row contains empty entries, this is a "separator row" between meetings.
173+
# Preserve it as-is.
174+
if row[0] == "":
175+
results.append(row)
176+
continue
177+
178+
paper = PaperInfo.from_csv_row(row)
179+
180+
# If the row is already implemented, basically keep it unchanged but also validate that we're not
181+
# out-of-sync with any still-open Github issue tracking the same paper.
182+
if paper.is_implemented():
183+
dangling = [gh for gh in from_github if gh.paper_number == paper.paper_number and not gh.is_implemented()]
184+
if dangling:
185+
raise RuntimeError(f"We found the following open tracking issues for a row which is already marked as implemented:\nrow: {row}\ntracking issues: {dangling}")
186+
results.append(paper.for_printing())
187+
else:
188+
# Find any Github issues tracking this paper
189+
tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
190+
191+
# If there is no tracking issue for that row in the CSV, this is an error since we're
192+
# missing a Github issue.
193+
if not tracking:
194+
raise RuntimeError(f"Can't find any Github issue for CSV row which isn't marked as done yet: {row}")
195+
196+
# If there's more than one tracking issue, something is weird too.
197+
if len(tracking) > 1:
198+
raise RuntimeError(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
199+
200+
# If the issue is closed, synchronize the row based on the Github issue. Otherwise, use the
201+
# existing CSV row as-is.
202+
results.append(tracking[0].for_printing() if tracking[0].is_implemented() else row)
203+
204+
return results
205+
206+
CSV_FILES_TO_SYNC = [
207+
'Cxx14Issues.csv',
208+
'Cxx17Issues.csv',
209+
'Cxx17Papers.csv',
210+
'Cxx20Issues.csv',
211+
'Cxx20Papers.csv',
212+
# TODO: The Github issues are not created yet.
213+
# 'Cxx23Issues.csv',
214+
# 'Cxx23Papers.csv',
215+
# 'Cxx2cIssues.csv',
216+
# 'Cxx2cPapers.csv',
217+
]
218+
219+
def main():
220+
libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
221+
222+
# Extract the list of PaperInfos from issues we're tracking on Github.
223+
print("Loading all issues from Github")
224+
gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
225+
project_info = json.loads(subprocess.check_output(gh_command_line))
226+
from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
227+
228+
for filename in CSV_FILES_TO_SYNC:
229+
print(f"Synchronizing {filename} with Github issues")
230+
file = libcxx_root / 'docs' / 'Status' / filename
231+
csv = load_csv(file)
232+
synced = sync_csv(csv, from_github)
233+
write_csv(file, synced)
234+
235+
if __name__ == '__main__':
236+
main()

0 commit comments

Comments
 (0)