diff --git a/Classes/AppMisc.py b/Classes/AppMisc.py index 56c5148..a84e253 100644 --- a/Classes/AppMisc.py +++ b/Classes/AppMisc.py @@ -5,7 +5,8 @@ def ReverseTable(data): # Reverses the order of a wikimedia table. Can handle merged rows, but only in first column if data.count("|}") - data.count("|}}") == 1 and "wikitable" in data: - data = [f.strip().replace("↓", "↑") for f in data.split("\n")] + data = [f.strip() for f in data.split("\n")] + data = [f.replace("↓", "↑") if "↓" in f else f.replace("↑", "↓") for f in data] headers = [c for c,v in enumerate(data) if v != "" and v[0] == "!"] start_index = headers[-1] + 1 for i,j in enumerate(headers,headers[0]): diff --git a/Classes/AppTourneydrawDE.py b/Classes/AppTourneydrawDE.py index 1e2033a..dbc5e2b 100644 --- a/Classes/AppTourneydrawDE.py +++ b/Classes/AppTourneydrawDE.py @@ -1,5 +1,5 @@ # Name: Tournament draw generator (dewiki) -# Author: Somnifuguist (w.wiki/fDy) +# Author: Somnifuguist # Date: 10-10-2020 # Content: Generates wikitext for tennis tournament draws @@ -20,8 +20,13 @@ def GetNameCorrections(): # Loads corrections from https://de.wikipedia.org/wiki/Benutzer:Siebenschl%C3%A4ferchen/Turnier-Generator - wikitext = GetSoup("https://de.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&&titles=Benutzer:Siebenschl%C3%A4ferchen/Turnier-Generator", 'json')['query']['pages']['11514212']['revisions'][0]['*'] change = [{}, {}] # {full name corrections}, {shortened name corrections}; + page = "Benutzer:Siebenschl%C3%A4ferchen/Turnier-Generator" + try: + wikitext = GetSoup("https://de.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&&titles=" + page, 'json')['query']['pages']['11514212']['revisions'][0]['*'] + except: + return change + # {full name corrections}[player] = [[full link, shortened link], [full link for Russian, shortened link for Russian]]; Russian links are only used by GetNameLink() if player is Russian shortened = False for d in wikitext.split("\n* "): @@ -56,9 +61,13 @@ def GetNameLink(name, country, mens): # Finds and returns formatted name and wikilinks for given name. if name == "": return ["", ""] + elif "qualifier" in name.lower() or name.lower() in ["bye", "player missing"]: + return ["Ziel="+name, "Ziel="+name] + old_name = name name = name.replace(". ", ".").replace(".", ". ") split_name = name.split(" ") + mixed_case = [f for f in split_name if (not f.islower() and not f.isupper()) or f.islower()] surname_index = 0 if mixed_case != []: @@ -139,22 +148,26 @@ def GetNameLink(name, country, mens): class Page(): def __init__(self): self.text = [] # contains the draw wikitext + self.error = "" class Player(): def __init__(self, player, date, mens): if player != None: country = "" - country = GetFlagDE(player[1], date) + if player[1] != "": + country = GetFlagDE(player[1], date) name_link = GetNameLink(player[0], country, mens) # 0: full name, 1: shortened name, 2: full name without nowrap (for seeds) - self.playertext = {0: "{{" + country + "|" + name_link[0] + "}}", 1: "{{" + country + "|" + name_link[1] + "}}", 2: "{{" + country + "|" + name_link[0] + "}}"} + def country_temp(country, name): + return "{{" + country + "|" + name + "}}" if country != "" else "[[" + name + "]]" + self.playertext = {0: country_temp(country, name_link[0]), 1: country_temp(country, name_link[1]), 2: country_temp(country, name_link[0])} length0 = len(name_link[0].split("|")[-1].replace("Ziel=", "")) length1 = len(name_link[1].split("|")[-1]) if length0 > 10: # need to prevent name wrapping in draw bracket self.playertext[0] = "{{nowrap|" + self.playertext[0] + "}}" if length1 > 10: self.playertext[1] = "{{nowrap|" + self.playertext[1] + "}}" - self.seed = [f.replace("Alt", "ALT").replace("A", "ALT") for f in player[2]] + self.seed = [f.replace("Alt", "ALT") for f in player[2]] else: text = "{{nowrap|PLAYER MISSING}}" self.playertext = {0: text, 1: text, 2: text} @@ -193,10 +206,13 @@ def __init__(t, data, mens, format, qual, date): t.doubles = len(data[0][0][0]) == 2 t.qual = qual t.byes = sum([match[2][1][0] == "BYE" for match in data[0]]) > 0 + t.init = ["INIT"] in data[0][0][2] round_names = ["Erste Runde", "Zweite Runde", "Dritte Runde", "Vierte Runde", "Achtelfinale", "Viertelfinale", "Halbfinale", "Finale", "Sieg"] nonfinal = t.rounds < 5 # tournament starts with first round instead of final t.round_names = round_names[:t.rounds-1] + ["Qualifikationsrunde", "qualifiziert"] if t.qual else round_names[:t.rounds-4+nonfinal] + round_names[-5+nonfinal:] - t.template_size = 32 if t.rounds == 5 else 8 if (t.rounds == 6 and t.doubles) else 16 # template size for main draws + t.template_size = 32 if t.rounds == 5 else 8 if ((t.rounds == 6 and t.doubles) or t.rounds == 3) else 16 # template size for main draws + t.template = None + t.qual_index = 0 SaveJSON("data/NamesDE.json", name_links) def SplitData(t, n, r): @@ -247,11 +263,13 @@ def MakeQualifiers(t, p): def MakeSection(t, p, data, rounds, round_names, format, byes, compact, abbr): # Generates wikitext for section of draw, using standard templates like {{Turnierplan16-3}}. - p.text += ["{{" + "Turnierplan" + str(2**rounds) + ("-kompakt-" if compact else "-") + str(format) + ("-Freilos" if byes else "")] + if not t.qual or t.qual_index == 0: + template = ("{{Turnierplan" + str(2**rounds) + ("-kompakt-" if compact else "-") + str(format) + ("-Freilos" if byes else "")) if t.template == None else t.template + p.text += [template] for c, round in enumerate(round_names): p.text += ["| RD" + str(c+1) + "=" + round] for j in range(rounds): - team_no = 1 + team_no = 1 if not t.qual else 2**(rounds-j) * t.qual_index + 1 for match in data[j]: p.text += [""] retired = False @@ -285,25 +303,36 @@ def MakeSection(t, p, data, rounds, round_names, format, byes, compact, abbr): for i in range(2): # add seed, team name/flag, score parameters for each team in given match team = match.teams[i] - bold = "'''" if match.winner == i else "" + bold = "'''" if match.winner == i and not t.init else "" name_text = "
&nbsp;".join([(bold + f.playertext[abbr] + bold if not match.bye else "") for f in team]) rd = "| RD" + str(j+1) + "-" num = "0" + str(team_no) if team_no < 10 and (rounds > 3 or compact) else str(team_no) p.text += [rd + "seed" + num + "=" + ("" if match.bye else ("&nbsp;" if team[0].seed == [] else (team[0].seed[0] if len(team[0].seed) == 1 else "{{nowrap|" + team[0].seed[0] + " " + team[0].seed[1] + "}}")))] p.text += [rd + "team" + num + "=" + (name_text if name_text != "
&nbsp;" else "")] + match_tiebreak = t.match_tiebreak + if t.match_tiebreak and set == 2 and len(match.score) > 1 and match.score[set+1][0].isdigit() and match.score[set+1][1].isdigit(): + if max(int(match.score[set+1][0]), int(match.score[set+1][1])) < 10: + match_tiebreak = False + if "match tiebreak" not in p.error: + p.error += "\nERROR: Mix of match tiebreak scores and non-match tiebreak scores. Check all scores.\n" + for set in range(match.sets): - p_score = "" if match.bye else match.score[set+1][i] + p_score = "" if match.bye or t.init else match.score[set+1][i] won_set = (i == match.score[0][1][set]) if set < len(match.score[0][1]) else 0 # no sets in byes/walkovers p_score = "[" + p_score + "]" if t.match_tiebreak and set == 2 and p_score != "" else p_score # add square brackets for match tiebreak p.text += [rd + "score" + num + "-" + str(set+1) + "=" + ("'''" + p_score + "'''" if won_set else p_score)] team_no += 1 - p.text += ["}}"] + t.qual_index += 1 + if not t.qual or t.qual_index == len(t.data[-1]): + p.text += ["}}"] def MakeDraw(t, p, compact, abbr): if t.qual: p.text += ["", "=== Ergebnisse ==="] sections = t.SplitData(len(t.data[-1]), t.rounds) + rounds_str = "-".join([str(len(t.data[-1]) * (2)**(c)) for c in range(t.rounds + 1)][::-1]) + t.template = "{{Turnierplan-Qualifikation-" + rounds_str + ("-Finale" if t.format == 53 else "") for i in range(len(sections)): # no logical section headings ordinal = num2words(i+1, ordinal=True).capitalize() t.MakeSection(p, data=sections[i], rounds=t.rounds, round_names=t.round_names[:-1], format=t.format, byes=t.byes, compact=compact, abbr=abbr) @@ -351,4 +380,4 @@ def TournamentDrawOutputDE(data, date, format, mens, qual, compact, abbr): t = Tournament(data=data, mens=mens, format=format, qual=qual, date=date) t.MakeDraw(p, compact=compact, abbr=abbr) names = "" if new_names == empty_names else f"{new_names}" - return [names, "\n".join(p.text)] + return [names, p.error + "\n".join(p.text)] diff --git a/Classes/AppTourneydrawEN.py b/Classes/AppTourneydrawEN.py index 5bcfc27..393a258 100644 --- a/Classes/AppTourneydrawEN.py +++ b/Classes/AppTourneydrawEN.py @@ -1,5 +1,5 @@ # Name: Tournament draw generator (enwiki) -# Author: Somnifuguist (w.wiki/fDy) +# Author: Somnifuguist # Date: 10-10-2020 # Content: Generates wikitext for tennis tournament draws @@ -17,11 +17,18 @@ # new_names = "" corrections = [] +wiki_pages = [] def GetNameCorrections(): - # Loads corrections from https://en.wikipedia.org/wiki/User:Somnifuguist/NameCorrections - wikitext = GetSoup("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&&titles=User:Somnifuguist/NameCorrections", 'json')['query']['pages']['68204305']['revisions'][0]['*'] + # Loads corrections from page + page = "User:Somnifuguist/NameCorrections" change = [{}, {}] # {full name corrections}, {shortened name corrections} + return change + try: + wikitext = GetSoup("https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&&titles=" + page, 'json')['query']['pages']['68204305']['revisions'][0]['*'] + except: + return change + # {full name corrections}[player] = [full name link, shortened name link] shortened = False for d in wikitext.split("\n* "): @@ -44,11 +51,7 @@ def GetNameCorrections(): change[1][d[0]] = d[1] return change -def GetNameLink(name): - # Finds and returns formatted name and wikilinks for given name. - if name == "": - return ["", ""] - # old_name = name +def NormaliseName(name): name = name.replace(". ", ".").replace(".", ". ") split_name = name.split(" ") mixed_case = [f for f in split_name if (not f.islower() and not f.isupper()) or f.islower()] @@ -59,11 +62,37 @@ def GetNameLink(name): surname = HumanName(" ".join(split_name[surname_index:])) surname.capitalize(force=True) name = (first_names + " " + str(surname)).strip() + return name + +## bulk download of player wikipedia pages rather than repeated page requests +# def FetchWikiPages(data): +# return +# global name_links +# global wiki_pages +# +# players = [[g[0] for g in f[0]] for f in data[0]] + [[g[0] for g in f[1]] for f in data[0]] +# players = [j for i in players for j in i] +# players = [NormaliseName(f) for f in players] +# keys = [LowerName(f) for f in players] +# # players = [f for f in players if f not in name_links] +# title_str = "|".join(players[:3]) +# content = GetSoup(f"https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=*&format=json&&titles=" + title_str, 'json') +# # page_text = GetSoup("https://en.wikipedia.org/wiki/" + name.replace(" ", "_"), False).text" +def GetNameLink(name): + # Finds and returns formatted name and wikilinks for given name. global name_links global corrections global new_names + if "qualifier" in name.lower() or name.lower() in ["bye", "player missing"]: + return [name, name] + + if name == "": + return ["", ""] + # old_name = name + name = NormaliseName(name) + key = LowerName(name) # key for name in names dict if key in name_links: links = name_links[key] @@ -72,7 +101,7 @@ def GetNameLink(name): page_text = "" if page_text == None else page_text title = name # player's article's title player_page = ["International Tennis Federation", "Prize money", "Grand Slam", "tennis career", "Wikipedia does not have", "WTA", "ITF", "ATP"] - disamb_page = ["may refer to"] + disamb_page = ["may refer to", "may also refer to"] disamb = " (tennis)" is_disamb = False pipe = False # pipe [[title|name]] instead of [[title]]. @@ -119,14 +148,20 @@ def GetNameLink(name): class Page(): def __init__(self): self.text = [] # contains the draw wikitext + self.error = "" class Player(): def __init__(self, player, year): self.flag = "" - if player != None: - self.flag = GetFlagEN(player[1], year) - self.name_link = GetNameLink(player[0]) - self.seed = player[2] + if player[2] != ['BYE']: + if player != None: + self.flag = GetFlagEN(player[1], year) + self.name_link = GetNameLink(player[0]) + self.seed = player[2] + else: + self.flag = "" + self.name_link = ["", ""] + self.seed = [""] class Match(): def __init__(self, match, sets, year): @@ -141,7 +176,7 @@ def __init__(self, match, sets, year): self.winner = self.score[0][0] if self.score[0][1] == ["w/o"]: self.score[1] = ["", "w/o"] if self.winner else ["w/o", ""] # puts w/o on winner's side - self.bye = ["BYE"] in self.score + self.bye = ["BYE"] in self.score or ["INIT"] in self.score self.sets = sets class Tournament(): @@ -159,6 +194,7 @@ def __init__(t, data, format, qual, year): t.doubles = len(data[0][0][0]) == 2 t.qual = qual t.byes = [["BYE"] in match[2][1] for match in data[0]] + t.init = ["INIT"] in data[0][0][2] round_names = ["First Round", "Second Round", "Third Round", "Fourth Round", "Fifth Round", "Quarterfinals", "Semifinals", "Final", "Champion" + ("s" if t.doubles else "")] t.round_names = round_names[:t.rounds-1] + ["Qualifying Competition", "Qualified"] if t.qual else round_names[:t.rounds-3] + round_names[-4:] # sometimes called "Qualifying Round" @@ -213,7 +249,10 @@ def FindSection(sections, seed): reached = t.round_names[seeds[l][1]].replace("Round", "round").replace("Competition", "competition") # upper-case is used in draw templates but not seeds retiredwithdrewdefaulted = ", retired" if seeds[l][2] == "r" else (", withdrew" if seeds[l][2] == "w/o" else (", defaulted" if seeds[l][2] == "d" else "")) name_text = " / ".join([f.flag + " " + f.name_link[0] for f in seeds[l][0]]) - page += [number + (style if style == "'''" else "") + name_text + " " + (style if style == "''" else "") + "(" + reached + retiredwithdrewdefaulted + ")" + style] + if t.init: + page += [number + name_text] + else: + page += [number + (style if style == "'''" else "") + name_text + " " + (style if style == "''" else "") + "(" + reached + retiredwithdrewdefaulted + ")" + style] except KeyError: # seed not in draw, usually due to withdrawal page += [number + "''(Withdrew)''"] page += ["}}"] + (["", "{{Seeds explanation}}"] if seed_links else []) @@ -237,12 +276,16 @@ def MakeSection(t, p, data, rounds, round_names, format, byes, compact, short_na team_no = 1 for match in data[j]: p.text += [""] + retired = False + default = False if not match.parsed: # add sup tags if not already added for retirements/tiebreakers for c, set in enumerate(match.score[1:]): if set[-1] == "Retired": match.score[c+1][not match.score[0][0]] += "r" + retired = True elif set[-1] == "Default": match.score[c+1][not match.score[0][0]] += "d" + default = True elif set != ["", ""] and "w/o" not in set and len(set) == 3 and len(set[2]) == 2: match.score[c+1][0] = set[0] + "" + set[2][0] + "" match.score[c+1][1] = set[1] + "" + set[2][1] + "" @@ -250,16 +293,23 @@ def MakeSection(t, p, data, rounds, round_names, format, byes, compact, short_na for i in range(2): # add seed, team name/flag, score parameters for each team in given match team = match.teams[i] - bold = "'''" if match.winner == i else "" - name_text = "
&nbsp;".join([(f.flag + " " + (bold + f.name_link[short_names] + bold) if not match.bye else "") for f in team]) + bold = "'''" if match.winner == i and not t.init else "" + init_bye = any("BYE" in f.name_link[short_names] for f in team) + name_text = "
".join([(f.flag + " " + (bold + f.name_link[short_names] + bold) if (not match.bye or (t.init and not init_bye)) else "") for f in team]) rd = "| RD" + str(j+1) + "-" - p.text += [rd + "seed" + str(team_no) + "=" + ("" if match.bye else ("&nbsp;" if team[0].seed == [] else "/".join(team[0].seed)))] - p.text += [rd + "team" + str(team_no) + "=" + (name_text if name_text != "
&nbsp;" else "")] - + p.text += [rd + "seed" + str(team_no) + "=" + ("" if match.bye and not t.init else ("&nbsp;" if team[0].seed == [] else "/".join(team[0].seed)))] + p.text += [rd + "team" + str(team_no) + "=" + (name_text if name_text != "
" else "")] for set in range(match.sets): p_score = "" if match.bye else match.score[set+1][i] - won_set = (i == match.score[0][1][set]) if set < len(match.score[0][1]) else 0 # no sets in byes/walkovers - p_score = "[" + p_score + "]" if t.match_tiebreak and set == 2 and p_score != "" else p_score # add square brackets for match tiebreak + won_set = (i == match.score[0][1][set]) if set < len(match.score[0][1]) and not (set == len(match.score[0][1]) - 1 and (default or retired)) else 0 # no sets in byes/walkovers + + match_tiebreak = t.match_tiebreak + if t.match_tiebreak and set == 2 and len(match.score) > 1 and match.score[set+1][0].isdigit() and match.score[set+1][1].isdigit(): + if max(int(match.score[set+1][0]), int(match.score[set+1][1])) < 10: + match_tiebreak = False + if "match tiebreak" not in p.error: + p.error += "\nERROR: Mix of match tiebreak scores and non-match tiebreak scores. Check all scores.\n" + p_score = "[" + p_score + "]" if match_tiebreak and set == 2 and p_score != "" else p_score # add square brackets for match tiebreak p.text += [rd + "score" + str(team_no) + "-" + str(set+1) + "=" + ("'''" + p_score + "'''" if won_set else p_score)] team_no += 1 p.text += ["}}"] @@ -282,29 +332,33 @@ def MakeDraw(t, p, compact, abbr, seed_links): final_rounds = {5:2, 6:3, 7:3, 8:4} # number of rounds to show in "Finals" section given the number of rounds in the tournament final_rounds = final_rounds[t.rounds] t.MakeSection(p, data=t.data[-final_rounds:], rounds=final_rounds, round_names=t.round_names[-final_rounds - 1:-1], format=t.format, byes=False, compact=False, short_names=False) - sections = t.SplitData(parts, 4) - halves = [["===Top half==="], ["===Bottom half==="]] - for c, section in enumerate(sections): - if parts > 1: - p.text += halves[c // (parts // 2)] if c % (parts // 2) == 0 else [] # add half heading before each half - p.text += ["====Section " + str(c+1) + "===="] if t.rounds > 5 else [] # add section heading before each section - t.MakeSection(p, data=section, rounds=4, round_names=t.round_names[:4], format=(3 if t.format > 5 else t.format), byes=t.byes, compact=compact, short_names=abbr) - t.MakeSeeds(p, sections, seed_links) + if parts == 0: + t.MakeSection(p, data=t.data[-len(t.data):], rounds=len(t.data), round_names=["First Round"] + t.round_names[-len(t.data):-1], format=t.format, byes=False, compact=False, short_names=False) + else: + sections = t.SplitData(parts, 4) + halves = [["===Top half==="], ["===Bottom half==="]] + for c, section in enumerate(sections): + if parts > 1: + p.text += halves[c // (parts // 2)] if c % (parts // 2) == 0 else [] # add half heading before each half + p.text += ["====Section " + str(c+1) + "===="] if t.rounds > 5 else [] # add section heading before each section + t.MakeSection(p, data=section, rounds=4, round_names=t.round_names[:4], format=(3 if t.format > 5 else t.format), byes=t.byes, compact=compact, short_names=abbr) + t.MakeSeeds(p, sections, seed_links) -def initialize(): +def initialize(data): global name_links global corrections global new_names name_links = LoadJSON("data/NamesEN.json") corrections = GetNameCorrections() + # FetchWikiPages(data) # new_names = "
New names encountered:
\n" - return "", "\n".join(p.text) + return "", p.error + "\n".join(p.text) diff --git a/Classes/ClassSupportFunctions.py b/Classes/ClassSupportFunctions.py index 59d5d15..15f0894 100644 --- a/Classes/ClassSupportFunctions.py +++ b/Classes/ClassSupportFunctions.py @@ -5,6 +5,9 @@ from bs4 import BeautifulSoup # from selenium import webdriver # from selenium.webdriver.firefox.options import Options as FirefoxOptions +# from webdriver_manager.firefox import GeckoDriverManager + +os.environ['WDM_LOG_LEVEL'] = '0' # silence webdriver_manager output def GetCountrycode(filepath): filename = os.path.basename(filepath) @@ -16,7 +19,10 @@ def GetSoup(path, headers): elif headers == False: # want request only return requests.get(path) elif headers == "json": - return json.loads(requests.get(path).text) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' + } + return json.loads(requests.get(path, headers=headers).text) elif headers == {}: response = requests.get(path).text else: @@ -34,17 +40,40 @@ def SaveJSON(path, data): with open(path, 'w') as fp: json.dump(data, fp, sort_keys=True, indent=4) +def LoadFile(path): + data = [] + if os.path.exists(path): + with open(path, "r", encoding="ISO 8859-1") as infile: + data = infile.readlines() + data = [f.replace('\n', '') for f in data] + return data if data != None else [] + +def SaveFile(path, data): + with open(path, "wb") as outfile: + data = [f.encode('utf-8') for f in data] + outfile.write(("\n".encode('utf-8')).join(data)) + def Dedupe(x): return list(dict.fromkeys(x)) -# def GetSoupSelenium(url): -# options = FirefoxOptions() -# options.add_argument("--headless") -# driver = webdriver.Firefox(options=options) -# driver.get(url) -# time.sleep(5) # wait for JavaScript to load -# source = driver.page_source -# soup = get_soup(source, True) +def GetSoupSelenium(url, driver): + if driver == None: + options = FirefoxOptions() + options.add_argument("--headless") + driver = webdriver.Firefox(options=options, executable_path=GeckoDriverManager().install()) + profile = webdriver.FirefoxProfile() + profile.set_preference("general.useragent.override", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36") + driver.get(url) + time.sleep(2) # wait for JavaScript to load + source = driver.page_source + soup = GetSoup(source, True) + return soup, driver + +def GetSoupWayback(url): + soup = GetSoup("http://web.archive.org/save/" + url, None) + soup = GetSoup("https://web.archive.org/web/2030/" + url, None) + print ("https://web.archive.org/web/2030/" + url) + return soup def LowerName(name): return name.lower().replace("-", "").replace(" ", "").replace(".", "") diff --git a/Classes/ScrapeTournamentATP.py b/Classes/ScrapeTournamentATP.py new file mode 100644 index 0000000..613892a --- /dev/null +++ b/Classes/ScrapeTournamentATP.py @@ -0,0 +1,433 @@ +# Name: ATP Tournament scraper +# Author: Somnifuguist +# Date: 29-10-2021 +# Content: Scrapes data from ATP draws + +import copy +import datetime +import hashlib +import math +import os +import re +import sys +import time + +from ClassSupportFunctions import * + +flags = LoadJSON("data/FlagsATP.json") + +errors = "" +format = None + +def ExtractScore(score): + global errors + global format + + ties = [f.text.strip() for f in score.find_all("sup")] + for s in score('sup'): + s.decompose() + score = [f.strip() for f in score.text.strip().split("\n")if f.strip() != ''] + + if score == ["(W/O)"]: + new_score = [[0, ["w/o"]], ["Walkover"]] + else: + new_score = [[0, []]] + tie_index = 0 + for s in score: + if s == "(RET)": + if len(new_score) == 1: + new_score.append(['0', '0', "Retired"]) # retirement happened before first game of match finished + else: + if (max(int(new_score[-1][0]), int(new_score[-1][1])) > 5 and abs(int(new_score[-1][0]) - int(new_score[-1][1])) > 1): + new_score.append(['0', '0', "Retired"]) # retirement happened after set finished + elif (len(new_score[-1]) == 3 and abs(int(new_score[-1][-1][0]) - int(new_score[-1][-1][1])) > 1) or (int(new_score[-1][0]) + int(new_score[-1][1]) == 13): + new_score.append(['0', '0', "Retired"]) # retirement happened after tiebreak set finished + elif len(new_score[-1]) == 2: + new_score[-1] += ['Retired'] # retirement happened mid-set + elif s == "(DEF)": + if len(new_score) == 1: + new_score.append(['0', '0', "Default"]) # default happened before first game of match finished + else: + if (max(int(new_score[-1][0]), int(new_score[-1][1])) > 5 and abs(int(new_score[-1][0]) - int(new_score[-1][1])) > 1): + new_score.append(['0', '0', "Default"]) # default happened after set finished + elif (len(new_score[-1]) == 3 and abs(int(new_score[-1][-1][0]) - int(new_score[-1][-1][1])) > 1) or (int(new_score[-1][0]) + int(new_score[-1][1]) == 13): + new_score.append(['0', '0', "Default"]) # default happened after tiebreak set finished + elif len(new_score[-1]) == 2: + new_score[-1] += ['Default'] # default happened mid-set + elif s == "(W/O)": + pass + else: + new_set = [] + if "-" in s: + format = 2 + new_set = s.split("-") + elif len(s) == 2: + new_set = [s[0], s[1]] + elif len(s) == 1: + if "Partial" not in errors: + errors += "
WARNING: partial score in draw; check every score for errors." + continue + elif len(s) ==4: + new_set = [s[:2], s[2:]] + else: + if int(s[0]) == int(s[1:]) - 2: + new_set = [s[0], s[1:]] + else: + new_set = [s[:2], s[2:]] + if int(new_set[0]) + int(new_set[1]) == 13 and ties != []: + tie_l = int(ties[0]) + tie_w = tie_l + 2 if tie_l > 4 else 7 + if new_set[0] == '7': + new_set = ['7', '6', [str(tie_w), str(tie_l)]] + else: + new_set = ['6', '7', [str(tie_l), str(tie_w)]] + del ties[0] + new_score.append(new_set) + + for set in new_score[1:]: + if 'Retired' in set: + new_score[0][1].append("r") + elif 'Default' in set: + new_score[0][1].append("d") + else: + new_score[0][1].append(int(int(set[0]) 0: + match_count -= len(players) * ((1/2) ** (qual_rounds + 1)) + qual_rounds += 1 + rounds = math.ceil(math.log2(len(players))) + sequence = [f for f in get_sequence(rounds) if f <= qual_rounds] + + for c, player in enumerate(players): + match_indices = [i for i, x in enumerate(winners) if x == player] + matches_won = len(match_indices) + + match_indices_rounds = [[match_indices[p], sequence[match_indices[p]]] for p in range(matches_won)] # round no. of each match in indices + reordered_matches = [f[0] for f in sorted(match_indices_rounds, key=lambda x: x[1])] # sort matches into chronological order + + scores_copy = copy.deepcopy(scores) + for i in range(matches_won): + scores[match_indices[i]] = scores_copy[reordered_matches[i]] + + scores_indices_rounds = [[sequence[p], scores[p]] for p in range(len(scores))] # round no. of each match in indices + reordered_scores = [f[1] for f in sorted(scores_indices_rounds, key=lambda x: x[0])] # sort matches into chronological order + return scores + +def ReverseScore(score): + if score == "(NA)": + return score + else: + for c, f in enumerate(score[1:]): + if "Retired" in f or "Default" in f: + score[c+1] = f[:-1][::-1] + [f[-1]] + elif len(f) == 3 and len(f[2]) == 2: # tiebreak + score[c+1] = f[:-1][::-1] + [f[-1][::-1]] + else: + f.reverse() + score[0][0] = 1 + set_winners = [] + for set in score[0][1]: + if set == "w/o": + set_winners.append("w/o") + elif set == "d" or set == "r": + set_winners.append(1) + else: + set_winners.append(int(not set)) + score[0][1] = set_winners + return score + +def FillMatches(matches, winners, scores): + max_winners_left = 0 + while len(winners) > max_winners_left: + matches += [[]] + new_match = [] + for c, match in enumerate(matches[-2]): + error = False + winner = not match[0][0][0] in winners + new_match.append(match[winner]) + + try: + index = winners.index(match[winner][0][0]) # Scores must be in order so that the first instance of each player is their first match, hence Reorder() + except ValueError: + global errors + if winners.count(None) > 0: + if "2nd round" not in errors: + errors += "
WARNING: match data missing in 2nd round or later; check every match for errors." + index = winners.index(None) + else: + if "issues" not in errors: + errors += "
WARNING: match data has issues; check every match for errors." + max_winners_left += 1 + error = True + matches[-2][c].append([[0, []], ["BYE"]]) + + if not error: + matches[-2][c].append(ReverseScore(scores[index]) if winner else scores[index]) + del winners[index] + del scores[index] + + if c%2 == 1: + matches[-1].append(new_match) + new_match = [] + return matches[:-1] + +def ExtractTeam(team, doubles): + qual = re.search("QUALIFIER\d*\n", team.text) + if qual != None and not doubles: + return [[qual.group()[:-1], "", ["Q"]]] + players = team.find_all('a', {'class': 'scores-draw-entry-box-players-item'}) + team_data = [] + if players != []: + for p in players: + player = p['data-ga-label'] + seed = [""] + if team.find('span') != None: + aliases = {"AL":"Alt", "S":"SE"} + seed = [f.strip("()") for f in team.find('span').text.strip().split(" ")] + allowed = ["AL", "Alt", "ITF", "JE", "LL", "PR", "Q", "S", "SE", "WC"] + for c, s in enumerate(seed): + if not s.isdigit(): + if s in aliases: + seed[c] = aliases[s] + + flag_b64 = p.find('img', {'class': 'scores-draw-entry-box-players-item-flag'})['src'][26:] + flag_hash = hashlib.md5(flag_b64.encode('utf-8')).hexdigest() + country = "" + + if flag_hash not in flags: + player_id = re.search("\w{4}\/overview", team.a['href']).group()[:4].upper() + url = "https://www.atptour.com/-/ajax/Head2HeadSearch/GetHead2HeadData/F324/" + player_id + country = GetSoup(url, 'json')['playerRight']['PlayerCountryCode'] + flags[flag_hash] = country + SaveJSON("data/FlagsATP.json", flags) + time.sleep(5) + else: + country = flags[flag_hash] + + team_data.append([player, country, seed]) + elif "bye" in team.text.strip().lower(): + team_data = [["BYE", "", [""]]] * (2 if doubles else 1) + else: + team_data = [["PLAYER MISSING", "", [""]]] * (2 if doubles and qual == None else 1) + if qual != None: + team_data.append([qual.group()[:-1], "", ["Q"]]) + return team_data + +def AddMissingRounds(data, doubles): + base = [[['', '', ["BYE"]]* (2 if doubles else 1)]] * 2 + [[[0, []], ["BYE"]]] # blank match + data = [f for f in data if f != []] + rounds = int(math.log(len(data[-1]), 2)) # missing rounds to add + while rounds > 0: + data.append([base] * (2 ** (rounds - 1))) + rounds -= 1 + return data + +def ExtractTournament(soup, qualifying, doubles): + global errors + + table = soup.find_all('div', {'class': 'scores-draw-entry-box'}) + matches = [[]] + players = [] + matches_corrected = [[]] + + match_corrected = [] + for t in table: + trs = t.find_all('tr') + match = [] + for c, tr in enumerate(trs): + m = ExtractTeam(tr, doubles) + match.append(m) + if ["PLAYER MISSING", "", [""]] not in m: + match_corrected.append(m) + + players.append(m[0][0]) + if len(match_corrected) == 2: + matches_corrected[0].append(match_corrected.copy()) + match_corrected = [] + + if len(match) == 1: + match.append([["PLAYER MISSING", "", [""]]] * (2 if doubles else 1)) + players.append("PLAYER MISSING") + if "Player data missing" not in errors: + errors += "
WARNING: player data missing; check every match for errors." + if match != []: + matches[0].append(match) + + winners = [] + scores = [] + byes = [] + for t in table: + score = t.find('a', {'class':'scores-draw-entry-box-score'}) + if score != None: + if any(f in score.text.strip() for f in ["(NA)", "(UNP)", "(WEA)", "(ABN)"]): + winner = None + score = [[0, []], ["BYE"]] + if "NA/UNP/WEA/ABN" not in errors: + errors += "
WARNING: NA/UNP/WEA/ABN matches; Check every match for errors." + + else: + boxes = t.find('div', {'class': 'scores-draw-entry-box'}) + all_players = t.find_all('a', {'class': 'scores-draw-entry-box-players-item'}) + qual = "" + try: + winner = all_players[0]['data-ga-label'] + except: + try: + winner = re.search("QUALIFIER\d+Q", str(t)).group()[:-1] + except: + try: + winner = re.search("QUALIFIER\d *", str(t)).group()[:-1] + except: + winner = None + + score = ExtractScore(score) + scores.append(score) + winners.append(winner) + + elif t.text.strip() == "": # players & score missing + if "match data missing;" not in errors: + errors += "
WARNING: match data missing; check every match for errors." + winner = None + score = [[0, []], ["BYE"]] + scores.append(score) + winners.append(winner) + + elif "\n Bye\n" in t.text: + all_players = t.find_all('a', {'class': 'scores-draw-entry-box-players-item'}) + qual = "" + try: + winner = all_players[0]['data-ga-label'] + except: + try: + winner = re.search("QUALIFIER\d+Q", str(t)).group()[:-1] + except: + try: + winner = re.search("QUALIFIER\d *", str(t)).group().strip() + except: + winner = None + + qual = re.search("QUALIFIER\d+Q", str(t)) + if qual != None: + winner = qual + else: + qual = re.search("QUALIFIER\d+ *", str(t)) + if qual != None: + winner = qual.group().strip() + byes.append(winner) + + def insert_rounds(rounds, from_round): + sequence = get_sequence(rounds) + for c, v in enumerate(sequence): + if v > from_round: + scores.insert(c, [[0, []], ["BYE"]]) + winners.insert(c, None) + + if qualifying: + scores = Reorder(players, winners, scores, True) + matches = FillMatches(matches, winners, scores) + return matches + + if len(players) == 10 and len(winners) == 12: + errors += "
WARNING: extra matches present; check every match for errors." + scores = scores[:-3] + winners = winners[:-3] + matches = [matches[0][:-1]] + if len(winners) == 2 * len(matches[0]) and winners.count(None) != len(winners): + errors += "
WARNING: draw has an additional final/3rd place match which needs to be added." + rounds = int(math.log2(len(winners))) + del scores[rounds] + del winners[rounds] + elif len(winners) == 6: + insert_rounds(3, 2) + elif len(winners) == 12 and len(players) == 16: + insert_rounds(4, 2) + elif len(winners) == 14: + insert_rounds(4, 4) + elif len(winners) == 16: + insert_rounds(5, 1) + elif len(winners) == 24 and len(players) == 32: + insert_rounds(5, 2) + elif winners.count(None) >= len(winners) - (2 if len(winners) < 64 else 5): # empty draw + if len(matches[0]) in [9, 17]: + matches = matches_corrected + matches = [[f + [[[0, []], ["INIT"]]] for f in matches[0]]] + matches = AddMissingRounds(matches, doubles) + errors += "
WARNING: one or more rounds missing; check every round for errors." + return matches + + # for c in range(len(scores)): + # print (winners[c], scores[c]) + + if winners.count(None) == 1 and len(byes) == 1: + winners[winners.index(None)] = byes[0] + + scores = Reorder(players, winners, scores, False) + matches = FillMatches(matches, winners, scores) + return matches + +def ScrapeTournamentATP(url, data): + global format + global errors + errors = "" + format = None + + if url == None: + soup = GetSoup(data, True) + else: + # soup, driver = GetSoupSelenium(url, None) + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'} + files = os.listdir("../TournamentsATP") + id = re.search("_(\d+)_(\d+)_(draws|results)\?matchtype=(qualifier)*(singles|doubles)", url.replace("/", "_").lower()) + if id != None: + id = list(id.groups()) + id = f"{id[0]}_{id[1]}_{id[4]}_{id[3]}".lower() + for f in files: + data = LoadJSON(f"../TournamentsATP/" + f) + if str(id) in data: + return data[id] + soup = GetSoup(url, headers) + if "Attention Required!" in str(soup): + return None, None, None, None, None, None + + unavailable = soup.find('h3', {'class':'not-found-404'}) + if unavailable != None: + return None, None, None, None, None, None + + typelink = soup.find('a', {'class':'icon icon-blank current'}) + if typelink == None: + typelink = soup.find('a', {'class':'icon icon-blank disabled current'}) + if typelink == None: + return None, None, None, None, None, None + type = typelink.text.strip() + doubles = "Doubles" in type + qual = "Q" in type + + date = soup.find('span', {'class':'tourney-dates'}) + if date: + date = re.search("\d{4}\.\d{2}\.\d{2}", date.text.strip()) + date = datetime.datetime.strptime(date.group(), "%Y.%m.%d").date() + else: + date = re.search("\d{4}\/draws", typelink['href']).group()[:4] + date = datetime.date(int(date), 1, 1) + data = ExtractTournament(soup, qualifying=qual, doubles=doubles) + errors = (errors + "
\n" if errors != "" else "") + return data, format, qual, doubles, date, errors diff --git a/Classes/ScrapeTournamentITF.py b/Classes/ScrapeTournamentITF.py index 6395d9e..622bce4 100644 --- a/Classes/ScrapeTournamentITF.py +++ b/Classes/ScrapeTournamentITF.py @@ -1,5 +1,5 @@ # Name: ITF Tournament scraper -# Author: Somnifuguist (w.wiki/fDy) +# Author: Somnifuguist # Date: 10-10-2020 # Content: Scrapes data from ITF printable draws @@ -7,6 +7,9 @@ import itertools import math import re +import sys + +sys.path.append('data') from ClassSupportFunctions import GetSoup @@ -19,7 +22,7 @@ def ExtractTeam(team): if player == "": team_data.append(["","",[]]) elif player == "BYE": - team_data.append(["BYE", "", []]) + team_data.append(["BYE", "", [""]]) else: country = re.search(r"\([A-Z]{3}\)", player).group()[1:4] seed = [] @@ -29,7 +32,10 @@ def ExtractTeam(team): types = ["LL", "WC", "Q", "PR", "Alt", "SR", "A", "SE", "ITF"] for t in types: if "(" + t + ")" in player: - seed.append(t) + if t == "A": + seed.append("Alt") + else: + seed.append(t) name = (player.replace("(" + country + ")", "").replace("(" + (seed[-1] if seed != [] else "") + ")", "").replace("()", "").replace("[" + (seed[0] if seed != [] else "") + "]", "").strip(" ").replace(",", "")) team_data.append([name, country, seed]) return team_data @@ -64,12 +70,15 @@ def ExtractScore(score, match, winners): elif len(new_score[-1]) == 2: new_score[-1] += set # retirement happened mid-set elif set == ["Default"]: - if (max(int(new_score[-1][0]), int(new_score[-1][1])) > 5 and abs(int(new_score[-1][0]) - int(new_score[-1][1])) > 1): - new_score.append(['0', '0', "Default"]) # default happened after set finished - elif (len(new_score[-1]) == 3 and abs(int(new_score[-1][-1][0]) - int(new_score[-1][-1][1])) > 1) or (int(new_score[-1][0]) + int(new_score[-1][1]) == 13): - new_score.append(['0', '0', "Default"]) # default happened after tiebreak set finished - elif len(new_score[-1]) == 2: - new_score[-1] += set # default happened mid-set + if len(new_score) == 1: + new_score.append(['0', '0', "Default"]) # default happened before first game of match finished + else: + if (max(int(new_score[-1][0]), int(new_score[-1][1])) > 5 and abs(int(new_score[-1][0]) - int(new_score[-1][1])) > 1): + new_score.append(['0', '0', "Default"]) # default happened after set finished + elif (len(new_score[-1]) == 3 and abs(int(new_score[-1][-1][0]) - int(new_score[-1][-1][1])) > 1) or (int(new_score[-1][0]) + int(new_score[-1][1]) == 13): + new_score.append(['0', '0', "Default"]) # default happened after tiebreak set finished + elif len(new_score[-1]) == 2: + new_score[-1] += set # default happened mid-set elif set != [""]: tiebreaker = re.search(r"\(\d{1,}\)", set[1]) diff --git a/Classes/app.py b/Classes/app.py index 4362444..b89737f 100644 --- a/Classes/app.py +++ b/Classes/app.py @@ -7,6 +7,7 @@ from AppPlayerWorldranking import GetWorldRanking, RankingOutput from AppTourneydrawEN import TournamentDrawOutputEN from AppTourneydrawDE import TournamentDrawOutputDE +from ScrapeTournamentATP import ScrapeTournamentATP from ScrapeTournamentITF import ScrapeTournamentITF from AppMisc import GetMisc from forms import * @@ -109,26 +110,43 @@ def outputtourneydraw(): compact = request.args.get('compact', type = int) abbr = request.args.get('abbr', type = int) seed_links = request.args.get('seed_links', type = int) - + errors = "" error = False message = "" + names = [] # Rudimentary input validation - if org == 'itf' and not url.startswith("https://event.itftennis.com/itf/web/usercontrols/tournaments/tournamentprintabledrawsheets.aspx?"): + itfurl = "https://event.itftennis.com/itf/web/usercontrols/tournaments/tournamentprintabledrawsheets.aspx?" + atpurl = "https://www.atptour.com/en/scores/" + atpurl2 = "https://www.atptour.com/scores/" + if org == 'itf' and not url.startswith(itfurl): + error = True + output = "Invalid URL: should be a printable draw in format: '" + itfurl + "'...." + elif org != 'itf' and "https://www.atptour.com/" not in url: error = True - output = "Invalid URL: should be a printable draw in format: https://event.itftennis.com/itf/web/usercontrols/tournaments/tournamentprintabledrawsheets.aspx?..." + output = "Invalid URL: should contain: 'https://www.atptour.com/scores/...'" else: try: # Scrape data, then create draw - data, qual, doubles, date = ScrapeTournamentITF(url=url) - if language == "en": - names, output = TournamentDrawOutputEN(data=data, date=date, format=format, qual=qual, compact=compact, abbr=abbr, seed_links=seed_links) - elif language == "de": - names, output = TournamentDrawOutputDE(data=data, date=date, format=format, mens=gender, qual=qual, compact=compact, abbr=abbr) + if org == "itf": + data, qual, doubles, date = ScrapeTournamentITF(url=url) + else: + data, format2, qual, doubles, date, errors = ScrapeTournamentATP(url=url, data=None) + format = format if format2 == None else format2 + if isinstance(date, str): + date = datetime.strptime(date, "%Y-%m-%d").date() + if org != "itf" and data == None: + output = "Draw not in saved database and couldn't be scraped. Try again with an archived version saved in the Wayback Machine (URL format 'https://web.archive.org/...'). Use the Wayback Machine Firefox extension to quickly find/save an archived version of a page. There is no guarantee this will work." + else: + if language == "en": + names, output = TournamentDrawOutputEN(data=data, date=date, format=format, qual=qual, compact=compact, abbr=abbr, seed_links=seed_links) + elif language == "de": + names, output = TournamentDrawOutputDE(data=data, date=date, format=format, mens=gender, qual=qual, compact=compact, abbr=abbr) except Exception: message = str(traceback.format_exc()) error = True names = "" - output = 'The program has encountered an error. Please go back and check that all inputs are correct. If the error persists, please contact Somnifuguist with the offending url:
' + url + '
' + output = 'The program has encountered an error. Please go back and check that all inputs are correct. If the error persists, please contact Somnifuguist see if you can find the same draw on the other site.' + output = errors + output timestamp = "[" + str(datetime.now())[:-7] + "] " log = timestamp + ("PASS: " if not error else "FAIL: ") + "lang=" + language + ", format=" + str(format) + ", compact=" + str(compact) + ", abbr=" + str(abbr) + ", seed_links=" + str(seed_links) +", url=" + url + (", message=\n" + message if message != "" else "") + '\n' with open('tourneydraw.log','a') as f: diff --git a/Classes/data/TestsATP.json b/Classes/data/TestsATP.json new file mode 100644 index 0000000..f534a74 --- /dev/null +++ b/Classes/data/TestsATP.json @@ -0,0 +1,143 @@ +{ + "tests": [ + [ + "https://www.atptour.com/scores/archive/ahmedabad/593/1997/draws?matchtype=doubles", + "missing 1st round match" + ], + "https://www.atptour.com/scores/archive/adelaide/7308/1975/draws?matchtype=singles", + "missing 2nd round match" + ], + "https://www.atptour.com/scores/archive/andrezieux/827/2003/draws?matchtype=doubles", + "w/os" + ], + "https://www.atptour.com/scores/archive/asheville/673/1978/draws?matchtype=doubles", + "missing score, qualifier" + ], + "https://www.atptour.com/scores/archive/asheville/673/1978/draws?matchtype=singles", + "nameless qualifier" + ], + "https://www.atptour.com/scores/archive/bangkok/755/2002/draws?matchtype=singles", + "retirement after tiebreak" + ], + "https://www.atptour.com/scores/archive/birmingham/350/1999/draws?matchtype=singles", + "partial score" + ], + "https://www.atptour.com/scores/archive/campinas/259/1980/draws?matchtype=doubles", + "many byes" + ], + "https://www.atptour.com/scores/archive/capetown/264/1990/draws?matchtype=singles", + "empty draw" + ], + "https://www.atptour.com/scores/archive/heilbronn/460/1998/draws?matchtype=singles", + "number seed and wildcard" + ], + "https://www.atptour.com/scores/archive/aachen/824/2006/draws?matchtype=doubles", + "match tiebreak" + ], + "https://www.atptour.com/scores/archive/augsburg/8266/2019/draws?matchtype=doubles", + "(UNP)" + ], + "https://www.atptour.com/scores/archive/ashkelon/216/1984/draws?matchtype=doubles", + "missing semifinals" + ], + "https://www.atptour.com/scores/archive/bad-saarow/639/1996/draws?matchtype=doubles", + "missing after round 1" + ], + "https://www.atptour.com/scores/archive/benin-city/242/1984/draws?matchtype=doubles", + "NAs after roundd 1" + ], + "https://www.atptour.com/scores/archive/liege/180/1988/draws?matchtype=singles", + "first round only with byes" + ], + "https://www.atptour.com/scores/archive/mendoza/566/1995/draws?matchtype=singles", + "33 players empty" + ], + "https://www.atptour.com/scores/archive/wrexham/782/2002/draws?matchtype=singles", + "first rdound missing player" + ], + "https://www.atptour.com/scores/archive/izmir/3834/2008/draws?matchtype=doubles", + "wrong ordered matches" + ], + "https://www.atptour.com/scores/archive/beckenham/279/1980/draws?matchtype=doubles", + "qualifiers, byes, mssin scores" + ], + "https://www.atptour.com/scores/archive/barcelona/552/1996/draws?matchtype=doubles", + "abandoned final round" + ], + "https://www.atptour.com/scores/archive/benin-city/242/1983/draws?matchtype=doubles", + "abandoned match" + ], + "https://www.atptour.com/scores/archive/nur-sultan/3618/2020/draws?matchtype=doubles", + "missing rounds" + ], + "https://www.atptour.com/scores/archive/zell-am-see/286/1979/draws?matchtype=doubles", + "byes, missing scores" + ], + "https://www.atptour.com/scores/archive/oberstaufen/833/2007/draws?matchtype=doubles", + "(UNP) scores, GE" + ], + "https://www.atptour.com/scores/archive/cherbourg/398/2019/draws?matchtype=doubles", + "(UNP), (W/O), alternates, GE" + ], + "https://www.atptour.com/scores/archive/olbia/643/1998/draws?matchtype=doubles", + "missing scores, byes, GE" + ], + "https://www.atptour.com/scores/archive/potchefstroom/8371/2020/draws?matchtype=doubles", + "2 missing rounds" + ], + "https://www.atptour.com/scores/archive/tallahassee/692/2003/draws?matchtype=doubles", + "match tiebreak retirement" + ], + "https://www.atptour.com/scores/archive/bucharest/3669/2008/draws?matchtype=doubles", + "QUALIFIER1QUALIFIER2" + ], + "https://www.atptour.com/scores/archive/rio-de-janeiro/299/1986/draws?matchtype=doubles", + "mixed up winners" + ], + "https://www.atptour.com/scores/archive/little-rock/9188/2021/draws?matchtype=doubles", + "bottom player in doubles team shown instead of top, GE" + ], + "https://www.atptour.com/scores/archive/santiago/883/1996/draws?matchtype=doubles", + "mixed up winners, GE" + ], + "https://www.atptour.com/scores/archive/miami/3764/2008/draws?matchtype=doubles", + "mixed up winners, GE" + ], + "https://www.atptour.com/scores/archive/muttenz/328/1972/draws?matchtype=singles", + "additional 3rd/4th place match" + ], + "https://www.atptour.com/scores/archive/cairo/206/1986/draws?matchtype=singles", + "32 rounds missing rds 2+" + ], + "https://www.atptour.com/scores/archive/stuttgart/321/1975/draws?matchtype=singles", + "8 players missing final" + ], + "https://www.atptour.com/scores/archive/porto-alegre/235/1979/draws?matchtype=doubles", + "qualifier and non-qualifier as doubles team" + ], + "https://www.atptour.com/scores/archive/charlotte/1505/1979/draws?matchtype=singles", + "standalone qualifier" + ], + "https://www.atptour.com/scores/archive/ogun/203/1983/draws?matchtype=doubles," + "mix of normal and match tiebreak deciders" + ], + "https://www.atptour.com/scores/archive/sao-paulo/6406/2012/draws?matchtype=singles", + "rounds missing with final" + ], + "https://www.atptour.com/scores/archive/dusseldorf/615/1985/draws?matchtype=singles", + "3rd/4th place match" + ], + "https://www.atptour.com/scores/archive/london/2064/1972/draws?matchtype=singles", + "2 rounds" + ], + "https://www.atptour.com/scores/archive/dusseldorf/615/2008/draws?matchtype=singles", + "round robin, GE" + ], + "https://www.atptour.com/scores/archive/dusseldorf/615/2010/draws?matchtype=doubles", + "round robin different size" + ], + "https://www.atptour.com/scores/archive/wimbledon/540/1920/draws?matchtype=singles" + "challenger round" + ] + ] +} diff --git a/Classes/data/tests.json b/Classes/data/TestsITF.json similarity index 100% rename from Classes/data/tests.json rename to Classes/data/TestsITF.json diff --git a/Classes/forms.py b/Classes/forms.py index 22f04f9..27ae7d2 100644 --- a/Classes/forms.py +++ b/Classes/forms.py @@ -25,7 +25,7 @@ class FormPlayerTournamentwins(FlaskForm): class FormTournamentdraw(FlaskForm): language = SelectField('Wikipedia language', choices=[('en', 'en'), ('de', 'de')]) - org = SelectField('Organisation', choices=[('itf', 'ITF')]) + org = SelectField('Organisation', choices=[('itf', 'ITF'), ('atp', 'ATP')]) url = StringField('Tournament link', validators=[DataRequired()]) gender = SelectField('Gender (dewiki)', choices=[(1, "men's"), (0, "women's")]) format = SelectField('Match format', choices=[(3, 'best of 3'), (5, 'best of 5'), (2, 'best of 3; tiebreak deciding set'), (35, 'best of 3; best of 5 final')]) diff --git a/Classes/requirements.txt b/Classes/requirements.txt index 518bf4c..56170e1 100644 --- a/Classes/requirements.txt +++ b/Classes/requirements.txt @@ -16,6 +16,8 @@ pyparsing qwikidata rdflib requests +selenium six soupsieve urllib3 +webdriver_manager \ No newline at end of file