In [10]:
from bs4 import BeautifulSoup
import pandas as pd
import re

# Read the HTML content from the file
with open('../data/fantasydraft.html', 'r') as file:
    html = file.read()

# Parse the HTML
soup = BeautifulSoup(html, 'html.parser')

# Parse the HTML
soup = BeautifulSoup(html, 'html.parser')

# Prepare lists to store the extracted data
numbers = []
player_names = []
positions = []
position_types = []
position_ranks = []
teams = []

# Iterate over each row in the table
for row in soup.find_all('tr'):
    # Extract the number after <tr><td>
    number = row.find('td').text.strip()
    numbers.append(number)
    
    # Extract the player name from fp-player-name attribute
    player_tag = row.find('a', class_='player-name')
    player_name = player_tag.get('fp-player-name') if player_tag else None
    player_names.append(player_name)

    # Extract the team name (first <small> tag)
    team_tag = row.find('small')
    team_name = team_tag.text.strip() if team_tag else None
    teams.append(team_name)

    # Extract the position (first <td> after the player information)
    position = row.find_all('td')[2].text.strip()
    positions.append(position)

    # Split the position into position type and position rank using regex
    match = re.match(r"([A-Za-z]+)(\d+)", position)
    if match:
        position_type = match.group(1)
        position_rank = match.group(2)
    else:
        position_type = None
        position_rank = None

    position_types.append(position_type)
    position_ranks.append(position_rank)

# Create a DataFrame from the extracted data
df = pd.DataFrame({
    'Number': numbers,
    'Player Name': player_names,
    'Position Type': position_types,
    'Position Rank': position_ranks,
    'Team': teams
})

# Display the DataFrame
print(df.columns)
print(df.head())

Index(['Number', 'Player Name', 'Position Type', 'Position Rank', 'Team'], dtype='object')
  Number          Player Name Position Type Position Rank Team
0      1  Christian McCaffrey            RB             1   SF
1      2     Justin Jefferson            WR             1  MIN
2      3        Austin Ekeler            RB             2  WAS
3      4        Ja'Marr Chase            WR             2  CIN
4      5         Travis Kelce            TE             1   KC


In [19]:
# Initialize an empty DataFrame for storing the results
teams = df['Team'].unique()
result = pd.DataFrame(columns=['Team'])
result['Team'] = teams

# Find the minimum 'Number' for each position type and team
for position in ['QB', 'WR', 'RB', 'TE']:
    min_numbers = (
        df[df['Position Type'] == position]
        .groupby('Team')['Number']
        .min()
        .reset_index()
        .rename(columns={'Number': position})
    )
    
    # Merge the result with the main result DataFrame
    result = pd.merge(result, min_numbers, on='Team', how='left')

# Fill NaN values with None if a team does not have a player in that position
result = result.fillna('500')
result['DraftRank'] = result[['QB', 'WR', 'RB', 'TE']].astype(int).sum(axis=1)

# Display the resulting DataFrame
result.to_csv('../data/fantasydraft.csv', index=False)
print(result)

    Team   QB   WR   RB   TE  DraftRank
0     SF  155   37    1  297        490
1    MIN  280    2  211   46        539
2    WAS  500   53    3  236        792
3    CIN   34   33  192  229        488
4     KC   12  112  104    5        233
5    MIA   93  113  109  500        815
6    ATL  105  169  134   71        479
7    PHI  160   13  152   70        395
8    CLE   82  111  142   94        429
9    HOU  193   10   30  125        358
10   DAL  313   11  124  206        654
11   BAL   32  101   14  252        399
12    GB  162  139   15  177        493
13   TEN  500  118   16  138        772
14   NYJ   99  132  277  241        749
15   LAR  187   18  249  137        591
16   DET  128  146   31  147        452
17   BUF   20  244   60  110        434
18   PIT  145  183  121   90        539
19    NO  151  214  166  157        688
20   JAC  231   73  129   83        516
21    NE  500  197   29  260        986
22   SEA  123   36   38  275        472
23   LAC   41  122  194  263        620
