In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the webpage
url = 'https://www.pff.com/news/nfl-roster-rankings-all-32-teams-2024-strengths-weaknesses-x-factors'

# Fetch the webpage content
response = requests.get(url)
if response.status_code == 200:
    page_content = response.content
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

# Parse the webpage content
soup = BeautifulSoup(page_content, 'html.parser')

# Initialize an empty list to store the data
data = []

# Find all team sections
team_headers = soup.find_all('h3')
for header in team_headers:
    # Extract team name
    team_name = header.text.strip()
    # Find the next table containing player data
    table_wrapper = header.find_next('div', class_='g-table-wrapper')
    if table_wrapper:
        player_table = table_wrapper.find('table')
        if player_table:
            # Extract player data
            rows = player_table.find_all('tr')
            for row in rows[1:]:  # Skip the header row
                cells = row.find_all('td')
                if len(cells) == 2:
                    # Offense and Defense player data
                    for cell in cells:
                        cell_text = cell.text.strip()
                        if cell_text:  # Check if the cell is not empty
                            parts = cell_text.split(' ')
                            position = parts[0]
                            player_name = ' '.join(parts[1:-1])
                            grade = parts[-1].strip('()')
                            # Append the extracted data to the list
                            data.append({
                                'Team': team_name,
                                'Position': position,
                                'Player': player_name,
                                'Grade': grade
                            })

# Create a DataFrame from the extracted data
currAVs = pd.DataFrame(data)

currAVs['Grade'] = currAVs['Grade'].str.replace('*', '')
currAVs['Grade'] = currAVs['Grade'].astype(float)

# Display the DataFrame
currAVs


Unnamed: 0,Team,Position,Player,Grade
0,1. San Francisco 49ers,QB,Brock Purdy,87.4
1,1. San Francisco 49ers,DI,Javon Hargrave,74.9
2,1. San Francisco 49ers,RB,Christian McCaffrey,90.3
3,1. San Francisco 49ers,DI,Maliek Collins,60.9
4,1. San Francisco 49ers,RB,Elijah Mitchell,70.2
...,...,...,...,...
763,32. Denver Broncos,CB,Levi Wallace,57.9
764,32. Denver Broncos,RG,Quinn Meinerz,83.7
765,32. Denver Broncos,S,Brandon Jones,75.4
766,32. Denver Broncos,RT,Mike McGlinchey,67.5


In [14]:
import pandas as pd

currAVs['Grade'] = pd.to_numeric(currAVs['Grade'], errors='coerce')

# Define position groups
position_groups = {
    'oline': ['LT', 'LG', 'C', 'RG', 'RT'],
    'qb': ['QB'],
    'rb': ['RB'],
    'wrte': ['WR', 'TE'],
    'dst': ['Edge', 'LB', 'Dl', 'CB', 'S']
}

# Initialize a list to store the data for the new DataFrame
new_data = []

# Get unique teams
teams = currAVs['Team'].unique()

# Calculate the averages for each team
for team in teams:
    team_data = {'Team': team}
    team_df = currAVs[currAVs['Team'] == team]
    for group, positions in position_groups.items():
        group_grades = team_df[team_df['Position'].isin(positions)]['Grade']
        if not group_grades.empty:
            team_data[group] = group_grades.mean()
        else:
            team_data[group] = None  # Or you can use 0 or another placeholder value
    new_data.append(team_data)

# Create the new DataFrame
currAVs = pd.DataFrame(new_data)

corrections = {
    '1. San Francisco 49ers': 'sfo',
    '2. Kansas City Chiefs': 'kan',
    '3. Philadelphia Eagles': 'phi',
    '4. New York Jets': 'nyj',
    '5. Baltimore Ravens': 'rav',
    '6. Detroit Lions': 'det',
    '7. Houston Texans': 'htx',
    '8. Cincinnati Bengals': 'cin',
    '9. Dallas Cowboys': 'dal',
    '10. Buffalo Bills': 'buf',
    '11. Miami Dolphins': 'mia',
    '12. Cleveland Browns': 'cle',
    '13. Green Bay Packers': 'gnb',
    '14. Los Angeles Rams': 'ram',
    '15. Atlanta Falcons': 'atl',
    '16. Pittsburgh Steelers': 'pit',
    '17. Seattle Seahawks': 'sea',
    '18. Tampa Bay Buccaneers': 'tam',
    '19. Jacksonville Jaguars': 'jax',
    '20. Chicago Bears': 'chi',
    '21. Minnesota Vikings': 'min',
    '22. Indianapolis Colts': 'clt',
    '23. Las Vegas Raiders': 'rai',
    '24. New Orleans Saints': 'nor',
    '25. Tennessee Titans': 'oti',
    '26. Los Angeles Chargers': 'sdg',
    '27. Washington Commanders': 'was',
    '28. Arizona Cardinals': 'crd',
    '29. New England Patriots': 'nwe',
    '30. Carolina Panthers': 'car',
    '31. New York Giants': 'nyg',
    '32. Denver Broncos': 'den'
}

currAVs['Team'] = currAVs['Team'].replace(corrections)
currAVs
# Display the new DataFrame
currAVs


Unnamed: 0,Team,oline,qb,rb,wrte,dst
0,sfo,65.42,87.4,80.25,85.45,76.42
1,kan,69.64,90.5,72.7,75.875,67.51
2,phi,69.78,86.7,62.75,71.95,68.7
3,nyj,70.6,39.4,70.7,73.925,76.69
4,rav,73.06,90.4,80.2,71.25,73.6
5,det,81.84,85.7,77.95,76.625,69.64
6,htx,57.2,83.1,68.5,81.35,70.1
7,cin,67.38,77.9,65.4,73.2,67.44
8,dal,62.42,90.0,67.1,72.675,77.18
9,buf,69.36,92.1,79.6,71.375,70.78


In [15]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the second CSV file
file_path = '/Users/kmaran3/Dropbox/Darkhorse/approximate value data/AVbyPositionGroup.csv'
av_by_position_group = pd.read_csv(file_path)

# Display the second DataFrame to understand its structure
print(av_by_position_group.head())

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Define columns to be scaled
columns_to_scale = ['oline', 'qb', 'rb', 'wrte', 'dst']

# Fit and transform the currAVs DataFrame
currAVs_scaled = currAVs.copy()
currAVs_scaled[columns_to_scale] = scaler.fit_transform(currAVs[columns_to_scale])

# Fit and transform the av_by_position_group DataFrame
av_by_position_group_scaled = av_by_position_group.copy()
av_by_position_group_scaled[columns_to_scale] = scaler.fit_transform(av_by_position_group[columns_to_scale])

# Display the scaled DataFrames to verify
print(currAVs_scaled.head())
print(av_by_position_group_scaled.head())
currAVs_scaled = currAVs_scaled.rename(columns={'Team': 'team'})

# Optionally, save the scaled DataFrames to new CSV files
currAVs_scaled.to_csv('/Users/kmaran3/Dropbox/Darkhorse/approximate value data/currAVs.csv', index=False)
av_by_position_group_scaled.to_csv('/Users/kmaran3/Dropbox/Darkhorse/approximate value data/AVbyPositionGroup.csv', index=False)

   Unnamed: 0 team     oline        qb        rb      wrte       dst  season
0          11  atl  0.468450  0.505668  0.096961  0.505668  0.061259    2013
1          33  buf  0.267762  0.199211  0.293717  0.199211  0.385860    2013
2          44  car  0.544385  0.454281  0.446946  0.454281  0.605865    2013
3          55  chi  0.636593  0.614260  0.307914  0.614260  0.069330    2013
4          66  cin  0.500994  0.488052  0.235777  0.488052  0.563472    2013
  Team     oline        qb        rb      wrte       dst
0  sfo  0.352013  0.867993  0.682261  1.000000  0.942943
1  kan  0.518548  0.924051  0.387914  0.554651  0.274024
2  phi  0.524073  0.855335  0.000000  0.372093  0.363363
3  nyj  0.556433  0.000000  0.309942  0.463953  0.963213
4  rav  0.653512  0.922242  0.680312  0.339535  0.731231
   Unnamed: 0 team     oline        qb        rb      wrte       dst  season
0          11  atl  0.468450  0.505668  0.096961  0.505668  0.061259    2013
1          33  buf  0.267762  0.199211  0.