In [38]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

In [139]:
BASE_URL = 'http://mzbs.pl'
MONDAY_TOURNAMENTS_URL = BASE_URL + '/wyniki/41'
WEDNESDAY_TOURNAMENTS_URL = BASE_URL + '/wyniki/43'

LAST_SLASH_RE = '[^/]+$'
DATE_RE = '\d+\-\d+\-\d+'
RESULTS_SUFIX = 'wyn.html'

In [145]:
def generate_tournament_urls(tournament_page_url):
    results_page_html = urlopen(tournament_page_url)
    soup = BeautifulSoup(results_page_html, 'html')
    result_anchors = soup.ul.findAll('a')
    urls = [a['href'] if BASE_URL in a['href'] else BASE_URL + a['href'] for a in result_anchors]
    urls = [re.sub(LAST_SLASH_RE, RESULTS_SUFIX, a) for a in urls]
    return [a for a in urls if '2018' in a]

In [159]:
def get_partner_name(row, player_name):
    names = [entry for entry in row.findAll('td') if player_name in str(entry)][0].a.contents
    names = [str(name) for name in names]
    return "".join(names).replace('<br/>', '').replace(player_name, '')

In [164]:
def get_results(player_name, urls):
    results = []
    
    for url in urls:
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html')

        table_rows = soup.table.findAll('tr')
        target_row = [table_row for table_row in table_rows if player_name in str(table_row)]
        if len(target_row) is not 1:
            continue;
        target_row = target_row[0]

        date = re.search(DATE_RE, str(soup.h4)).group(0)
        number_of_pairs = len(table_rows) - 1
        place_in_tournament = table_rows.index(target_row)
        partner_name = get_partner_name(target_row, player_name)

        entry = player_name + '|' + partner_name + '|' + str(place_in_tournament) + '/' + str(number_of_pairs) + '|' + date + '|' + url
        results.append(entry)
    
    return results[::-1]

In [165]:
urls_monday_2018 = generate_tournament_urls(MONDAY_TOURNAMENTS_URL)
urls_wednesday_2018 = generate_tournament_urls(WEDNESDAY_TOURNAMENTS_URL)

In [166]:
tournaments = [{'url': urls_monday_2018, 'name': 'Monday'}, {'url': urls_wednesday_2018, 'name': 'Wednesday'}]
players = ['Patryk Mrukot', 'Lech Adamus', 'Łukasz Baniak']

In [167]:
for player in players:
    for tournament in tournaments:
        print(player + ' - ' + tournament['name'])
        results = get_results(player, tournament['url'])
        for result in results:
            print(result)
        print('=======================================')

Patryk Mrukot - Monday
Patryk Mrukot|Albert Mosiałek|22/24|2018-07-02|http://mzbs.pl/files/2018/wyniki/180702E/wyn.html
Patryk Mrukot|Albert Mosiałek|5/34|2018-07-16|http://mzbs.pl/files/2018/wyniki/180716/wyn.html
Patryk Mrukot|Albert Mosiałek|36/36|2018-07-23|http://mzbs.pl/files/2018/wyniki/180723PO/wyn.html
Patryk Mrukot|Albert Mosiałek|24/40|2018-08-13|http://mzbs.pl/files/2018/wyniki/180813/wyn.html
Patryk Mrukot|Albert Mosiałek|25/37|2018-08-20|http://mzbs.pl/files/2018/wyniki/180820/wyn.html
Patryk Mrukot|Albert Mosiałek|27/35|2018-10-01|http://mzbs.pl/files/2018/wyniki/181001/wyn.html
Patryk Mrukot|Łukasz Baniak|7/27|2018-10-08|http://mzbs.pl/files/2018/wyniki/181008/wyn.html
Patryk Mrukot|Albert Mosiałek|31/31|2018-10-15|http://mzbs.pl/files/2018/wyniki/181015/wyn.html
Patryk Mrukot|Albert Mosiałek|15/30|2018-11-12|http://mzbs.pl/files/2018/wyniki/181112/wyn.html
Patryk Mrukot - Wednesday
Patryk Mrukot|Albert Mosiałek|42/44|2018-04-04|http://mzbs.pl/files/2018/wyniki/180404/w