In [1]:
!pip install requests beautifulsoup4




[notice] A new release of pip is available: 23.2.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# import requests
# from bs4 import BeautifulSoup
# import pandas as pd
# import math

# def fetch_div_relative(url):
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
#         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
#         'Accept-Language': 'en-US,en;q=0.5',
#         'Accept-Encoding': 'gzip, deflate',
#         'Connection': 'keep-alive',
#         'Upgrade-Insecure-Requests': '1'
#     }
    
#     try:
#         response = requests.get(url, headers=headers, timeout=2)
#         response.raise_for_status()  # Raises an HTTPError for bad responses
#     except requests.exceptions.Timeout:
#         print(f"Request timed out for URL: {url}")
#         return None
#     except requests.exceptions.RequestException as e:
#         print(f"Request failed: {e}")
#         return None

#     soup = BeautifulSoup(response.text, 'html.parser')
#     div_relative_content = soup.find('div', class_='relative')
#     return div_relative_content

# def extract_tables(div_relative_content):
#     tables = div_relative_content.find_all('table')
#     return tables

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import math

def fetch_page_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1'
    }

    try:
        response = requests.get(url, headers=headers, timeout=1.5)
        response.raise_for_status()  # Raises an HTTPError for bad responses
    except requests.exceptions.Timeout:
        print(f"Request timed out for URL: {url}")
        return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

    page_content = BeautifulSoup(response.text, 'html.parser')
    return page_content

def fetch_match_tables(url):
    page_content = fetch_page_content(url)
    # remove the top 2 tables since they are labels
    match_tables = page_content.find_all('table')[2:]
    return match_tables

# URL of the webpage to scrape
url = "http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2009November17.HTM"
# url = "http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July23.HTM"
match_tables = fetch_match_tables(url)
# print("Div relative content:", div_relative_content)

if match_tables:
    # print(type(match_tables[7]))
    print(match_tables[6])

<table cols="19" style="margin-right:7%">
<tr style="margin-bottom:-16px">
<td align="left" width="23%">Bruce Downs</td>
<td align="right" width="3%">12</td>
<td align="right" width="3%">0</td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%">_</td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27243&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27242&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27238&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27240&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%">_</td>
<td align="rig

### Main algo

In [4]:
def extract_score_and_rating_indices(player):
    # interested_indices[0] represents the score column, interested_indices[1] represents the rating column
    interested_indices = []
    game_details = player.find_all('td')

    for index, detail in enumerate(game_details):
        if detail.get_text(strip=True) == "_":
            interested_indices.append(index)
    if len(interested_indices) < 2:
        raise Exception("Error: The number of '-' in the player's data is not 2")
    return interested_indices[0], interested_indices[1]

def extract_players_data(players, num_players):
    # Extract data
    players_data = []
    scores_data = []
    ratings_data = []

    score_index, rating_index = extract_score_and_rating_indices(players[0])
    # print("score_index: ", score_index, ", rating_index: ", rating_index)

    for player in players:
        game_details = player.find_all('td')

        players_data.append(game_details[0].get_text(strip=True))
        
        # Dynamically generating the indices based on the number of players
        scores_indices = list(range(score_index, score_index + num_players))
        ratings_indices = list(range(rating_index, rating_index + num_players))

        # Extract how many games the current player won
        scores = [
            0 if game_details[i].get_text(strip=True) == "F" else int(game_details[i].get_text(strip=True)) 
            if game_details[i].get_text(strip=True).lstrip("-").isdigit()
            else math.nan 
            for i in scores_indices
        ]

        # Extract the ratings change of the current player
        ratings = [
            0 if game_details[i].get_text(strip=True) == "F" else int(game_details[i].get_text(strip=True)) 
            if game_details[i].get_text(strip=True).lstrip("-").isdigit() 
            else math.nan 
            for i in ratings_indices
        ]

        scores_data.append(scores)
        ratings_data.append(ratings)
    
    return players_data, scores_data, ratings_data

def tabulate_session_data(match_tables):
    """
    Extracts the session data from the table matches
    Output: [winner1, loser1, winner_score, loser_score, rating_change, table_number]
    """
    session_data = []

    for table_num, table in enumerate(match_tables):
        table_num += 1
        players = table.find_all('tr')
        
        players_data, scores_data, ratings_data = extract_players_data(players, len(players))

        for i in range(len(players_data)):
            for j in range(i + 1, len(players_data)):
                if i != j:
                    try:
                        # if the match is forfeited, skip
                        if scores_data[i][j] == 0 and scores_data[j][i] == 0:
                            continue
                        elif scores_data[i][j] > scores_data[j][i]:
                            session_data.append([players_data[i], players_data[j], scores_data[i][j], scores_data[j][i], ratings_data[i][j], table_num])
                        else:
                            session_data.append([players_data[j], players_data[i], scores_data[j][i], scores_data[i][j], ratings_data[j][i], table_num])
                        # session_data.append([players_data[i], players_data[j], scores_data[i][j], scores_data[j][i], abs(ratings_data[i][j]), table_num])
                    except IndexError:
                        print("players_data:", players_data)
                        print("scores_data:", scores_data)
                        print("IndexError:", i, j, "table number:", table_num)
                        # print("session_data:", session_data)

    return session_data

In [5]:
# URL of the webpage to scrape
url = "http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024May28.HTM"
url = "http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2009November17.HTM"
url = "http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July30.HTM"
match_tables = fetch_match_tables(url)

session_data = tabulate_session_data(match_tables)
session_data

[['Krish Balaji', 'Palak Jain', 3, 0, 8, 1],
 ['Krish Balaji', 'Changhao Liu', 3, 0, 4, 1],
 ['Krish Balaji', 'Daniel Phillips', 3, 0, 2, 1],
 ['Palak Jain', 'Changhao Liu', 3, 1, 4, 1],
 ['Palak Jain', 'Daniel Phillips', 3, 1, 2, 1],
 ['Changhao Liu', 'Daniel Phillips', 3, 2, 6, 1],
 ['Andrew Dyl', 'Rahul Movva', 3, 2, 10, 2],
 ['Andrew Dyl', 'Bethany Zhou', 3, 1, 8, 2],
 ['Andrew Dyl', 'Zorain Malik', 3, 1, 3, 2],
 ['Rahul Movva', 'Bethany Zhou', 3, 2, 7, 2],
 ['Rahul Movva', 'Zorain Malik', 3, 0, 2, 2],
 ['Bethany Zhou', 'Zorain Malik', 3, 1, 3, 2],
 ['Shakil Rehman', 'Xun Liang', 3, 0, 25, 3],
 ['Shakil Rehman', 'Prabhas Lanka', 3, 0, 10, 3],
 ['Shakil Rehman', 'Anup Gopalakrishnan', 3, 1, 10, 3],
 ['Xun Liang', 'Prabhas Lanka', 3, 1, 5, 3],
 ['Anup Gopalakrishnan', 'Xun Liang', 3, 2, 20, 3],
 ['Prabhas Lanka', 'Anup Gopalakrishnan', 3, 1, 7, 3],
 ['Michael Yavnel', 'Aarush Dhannawada', 3, 0, 0, 4],
 ['Michael Yavnel', 'Le Chen', 3, 1, 6, 4],
 ['Michael Yavnel', 'Nick Gentry', 3, 1

In [11]:
page_content = fetch_page_content("http://www.tabletennisleague.com/agtta/SessionGroupReportArchiveList.aspx")
li_soup = page_content.find_all('li')

# Create the 2D list
report_list = []
for li in li_soup:
    a_tag = li.find('a')
    if a_tag:
        date_text = a_tag.text.replace("Session Group Report for ", "")
        url = a_tag['href']
        report_list.append([date_text, url])

print(report_list)

[['July 30, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July30.HTM'], ['July 23, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July23.HTM'], ['July 16, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July16.HTM'], ['July 9, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July9.HTM'], ['July 2, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024July2.HTM'], ['June 25, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024June25.HTM'], ['June 18, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024June18.HTM'], ['June 11, 2024', 'http://www.tabletennisleague.com/AGTTA/SessionGroupReportArchive/SessionGroupReport2024June11.HTM'], ['June 4, 2024', 'http://www.tabletennislea

In [12]:
len(report_list)

704

## Test Cases

In [6]:
test_case_3_players_table = """
<table cols="19" style="margin-right:7%">
<tr style="margin-bottom:-16px">
<td align="left" width="23%">Brent Chen</td>
<td align="right" width="3%">6</td>
<td align="right" width="3%">1</td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%">_</td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27249&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27248&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">20</td>
<td align="right" width="4%">0</td>
<td align="right" width="8%">20</td>
<td align="right" width="9%">707</td>
<td width="1%"></td>
</tr>
<tr style="margin-bottom:-16px">
<td align="left" width="23%">Mohamed Ghallab</td>
<td align="right" width="3%">4</td>
<td align="right" width="3%">3</td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27249&amp;sessionnumber=9" title="Click to view details of this match">1</a></td>
<td align="right" width="3%">_</td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27247&amp;sessionnumber=9" title="Click to view details of this match">3</a></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%">-20</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">0</td>
<td align="right" width="8%">-20</td>
<td align="right" width="9%">774</td>
<td width="1%"></td>
</tr>
<tr style="margin-bottom:-16px">
<td align="left" width="23%">David Wang</td>
<td align="right" width="3%">0</td>
<td align="right" width="3%">6</td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27248&amp;sessionnumber=9" title="Click to view details of this match">0</a></td>
<td align="right" width="3%"><a href="DisplayMatch.ASP?matchid=27247&amp;sessionnumber=9" title="Click to view details of this match">0</a></td>
<td align="right" width="3%">_</td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%"></td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">_</td>
<td align="right" width="8%">0</td>
<td align="right" width="9%">422</td>
<td width="1%"></td>
</tr>
</table>
"""

# Parse the HTML content
soup = BeautifulSoup(test_case_3_players_table, 'html.parser')
players = soup.find_all('tr')

# Unit test
players_data, scores_data, ratings_data = extract_players_data(players, len(players))

print("Players Data:", players_data)
print("Scores Data:", scores_data)
print("Ratings Data:", ratings_data)

Players Data: ['Brent Chen', 'Mohamed Ghallab', 'David Wang']
Scores Data: [[nan, 3, 3], [1, nan, 3], [0, 0, nan]]
Ratings Data: [[nan, 20, 0], [-20, nan, 0], [0, 0, nan]]


In [7]:
test_case_4_players_table = """
<table cols="21" style="margin-right:7%">
<tr style="margin-bottom:-16px">
<td class="datacolumn1" style="color:red"><b>Sunil Bandapally</b></td>
<td class="gameswlcolumn">9</td>
<td class="gameswlcolumn">2</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59199&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59197&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59195&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">8</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">2</td>
<td class="nracolumn">10</td>
<td class="frcolumn">2088</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Geetha Krishna</td>
<td class="gameswlcolumn">8</td>
<td class="gameswlcolumn">4</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59199&amp;sessionnumber=7" title="Click to view details of this match">2</a></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59196&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59198&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">-8</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">2</td>
<td class="nracolumn">-6</td>
<td class="frcolumn">2063</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1" style="color:red"><b><a href="http://www.tabletennisleague.com/agtta/playerbio/PlayerBio.ASPx?playerID=1483" title="Click to view bio page for this player">Andrew Dyl</a></b></td>
<td class="gameswlcolumn">4</td>
<td class="gameswlcolumn">8</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59197&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59196&amp;sessionnumber=7" title="Click to view details of this match">1</a></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59200&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">30</td>
<td class="nracolumn">30</td>
<td class="frcolumn">1788</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1" style="color:red"><b><a href="http://www.tabletennisleague.com/agtta/playerbio/PlayerBio.ASPx?playerID=2649" title="Click to view bio page for this player">Aneece Khalek</a></b></td>
<td class="gameswlcolumn">2</td>
<td class="gameswlcolumn">9</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59195&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59198&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59200&amp;sessionnumber=7" title="Click to view details of this match">2</a></td>
<td class="gamesemcolumn">_</td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">-2</td>
<td align="right" width="4%">-2</td>
<td align="right" width="4%">-30</td>
<td align="right" width="4%">_</td>
<td class="nracolumn">-34</td>
<td class="frcolumn">1875</td>
</tr>
</table>
"""

# Parse the HTML content
soup = BeautifulSoup(test_case_4_players_table, 'html.parser')
players = soup.find_all('tr')

# Unit test
players_data, scores_data, ratings_data = extract_players_data(players, len(players))

print("Players Data:", players_data)
print("Scores Data:", scores_data)
print("Ratings Data:", ratings_data)

Players Data: ['Sunil Bandapally', 'Geetha Krishna', 'Andrew Dyl', 'Aneece Khalek']
Scores Data: [[nan, 3, 3, 3], [2, nan, 3, 3], [0, 1, nan, 3], [0, 0, 2, nan]]
Ratings Data: [[nan, 8, 0, 2], [-8, nan, 0, 2], [0, 0, nan, 30], [-2, -2, -30, nan]]


In [8]:
test_case_5_players_table = """
<table cols="21" style="margin-right:7%">
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Daniel Phillips</td>
<td class="gameswlcolumn">12</td>
<td class="gameswlcolumn">1</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59251&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59249&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59247&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59245&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">2</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td class="nracolumn">2</td>
<td class="frcolumn">1607</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Tresor Avognon</td>
<td class="gameswlcolumn">10</td>
<td class="gameswlcolumn">3</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59251&amp;sessionnumber=7" title="Click to view details of this match">1</a></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59246&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59250&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59243&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">-2</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td class="nracolumn">-2</td>
<td class="frcolumn">1435</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Pat Tran</td>
<td class="gameswlcolumn">6</td>
<td class="gameswlcolumn">7</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59249&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59246&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59244&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59248&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">7</td>
<td align="right" width="4%">5</td>
<td class="nracolumn">12</td>
<td class="frcolumn">652</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Lukasz Trzuskawski</td>
<td class="gameswlcolumn">3</td>
<td class="gameswlcolumn">10</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59247&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59250&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59244&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn">_</td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59252&amp;sessionnumber=7" title="Click to view details of this match">3</a></td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">-7</td>
<td align="right" width="4%">_</td>
<td align="right" width="4%">7</td>
<td class="nracolumn">0</td>
<td class="frcolumn">604</td>
</tr>
<tr style="margin-bottom:-16px">
<td class="datacolumn1">Leon Fu</td>
<td class="gameswlcolumn">2</td>
<td class="gameswlcolumn">12</td>
<td class="beforegw"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn1"></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59245&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59243&amp;sessionnumber=7" title="Click to view details of this match">0</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59248&amp;sessionnumber=7" title="Click to view details of this match">1</a></td>
<td class="gamesemcolumn"><a href="DisplayMatch.ASP?matchid=59252&amp;sessionnumber=7" title="Click to view details of this match">1</a></td>
<td class="gamesemcolumn">_</td>
<td class="aftergw"></td>
<td class="racolumn1"></td>
<td class="racolumn1"></td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">0</td>
<td align="right" width="4%">-5</td>
<td align="right" width="4%">-7</td>
<td align="right" width="4%">_</td>
<td class="nracolumn">-12</td>
<td class="frcolumn">559</td>
</tr>
</table>
"""

# Parse the HTML content
soup = BeautifulSoup(test_case_5_players_table, 'html.parser')
players = soup.find_all('tr')

# Unit test
players_data, scores_data, ratings_data = extract_players_data(players, len(players))

print("Players Data:", players_data)
print("Scores Data:", scores_data)
print("Ratings Data:", ratings_data)

Players Data: ['Daniel Phillips', 'Tresor Avognon', 'Pat Tran', 'Lukasz Trzuskawski', 'Leon Fu']
Scores Data: [[nan, 3, 3, 3, 3], [1, nan, 3, 3, 3], [0, 0, nan, 3, 3], [0, 0, 0, nan, 3], [0, 0, 1, 1, nan]]
Ratings Data: [[nan, 2, 0, 0, 0], [-2, nan, 0, 0, 0], [0, 0, nan, 7, 5], [0, 0, -7, nan, 7], [0, 0, -5, -7, nan]]
