In [3]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import unquote
import pandas as pd

# Function to extract player name from the URL
def extract_player_name(player_url):
    return unquote(player_url.split("/")[-1]).replace("-", " ")

# Function to scrape data from a player URL
def scrape_player_data(player_url):
    # Get player name
    player_name = extract_player_name(player_url)
    
    # Fetch HTML content
    data = requests.get(player_url)
    soup = BeautifulSoup(data.text, 'html.parser')

    # Extract data from the last 5 matches table
    last_5 = soup.select('#last_5_matchlogs td[data-stat]')
    last_5_data = []

    for cell in last_5:
        stat_name = cell['data-stat']
        stat_value = cell.get_text(strip=True)

        last_5_data.append((stat_name, stat_value))

    # Create a dictionary to store cumulative stats for each match
    match_stats = {stat: 0 for stat in ['minutes', 'goals', 'assists', 'pens_made', 'pens_att', 'shots', 'shots_on_target', 'cards_yellow', 'cards_red', 'touches', 'tackles', 'interceptions', 'blocks', 'xg', 'npxg', 'xg_assist', 'sca', 'gca', 'passes_completed', 'passes', 'passes_pct', 'progressive_passes', 'carries', 'progressive_carries', 'take_ons', 'take_ons_won']}

    # Populate the match_stats dictionary with cumulative stats
    for stat_name, stat_value in last_5_data:
        if stat_name in match_stats:
            match_stats[stat_name] += float(stat_value) if stat_value.replace('.', '', 1).isdigit() else 0

    # Calculate cumulative minutes
    cumulative_minutes = match_stats['minutes']

    # Calculate and display per 90 stats
    per_90_stats = {f"{stat_name}_p90": round((stat_total / cumulative_minutes) * 90, 2) for stat_name, stat_total in match_stats.items() if stat_name != 'minutes'}

    return player_name, per_90_stats

# List of player URLs
player_urls = [
 "https://fbref.com/en/players/aed3a70f/Ollie-Watkins",
 "https://fbref.com/en/players/e77dc3b2/Dominic-Solanke",
 "https://fbref.com/en/players/15ab5a2b/Julian-Alvarez", 
 "https://fbref.com/en/players/b66315ae/Gabriel-Jesus", 
 "https://fbref.com/en/players/9c36ed83/Nicolas-Jackson", 
 "https://fbref.com/en/players/4d77b365/Darwin-Nunez"
 "https://fbref.com/en/players/4e9a0555/Chris-Wood"
 "https://fbref.com/en/players/7c56da38/Christopher-Nkunku"
 "https://fbref.com/en/players/dc62b55d/Matheus-Cunha"
 "https://fbref.com/en/players/169fd162/Hwang-Hee-chan"
]

# quick links mids 
# https://fbref.com/en/players/c4486bac/Michael-Olise
# https://fbref.com/en/players/ae4fc6a4/Eberechi-Eze
# https://fbref.com/en/players/dc7f8a28/Cole-Palmer
# https://fbref.com/en/players/79c84d1c/Jarrod-Bowen
# https://fbref.com/en/players/79300479/Martin-Odegaard
# https://fbref.com/en/players/fa031b34/Richarlison
# https://fbref.com/en/players/df3cda47/Dejan-Kulusevski
# https://fbref.com/en/players/bf34eebd/Noni-Madueke
# https://fbref.com/en/players/9b6f7fd5/Lucas-Paqueta
# https://fbref.com/en/players/3a233281/Leon-Bailey
# https://fbref.com/en/players/aeed5c06/Moussa-Diaby
# https://fbref.com/en/players/ed1e53f3/Phil-Foden



# quick link forwards 
# "https://fbref.com/en/players/aed3a70f/Ollie-Watkins",
# "https://fbref.com/en/players/e77dc3b2/Dominic-Solanke",
# "https://fbref.com/en/players/15ab5a2b/Julian-Alvarez", 
# "https://fbref.com/en/players/b66315ae/Gabriel-Jesus", 
# "https://fbref.com/en/players/9c36ed83/Nicolas-Jackson", 
# "https://fbref.com/en/players/4d77b365/Darwin-Nunez"
# https://fbref.com/en/players/4e9a0555/Chris-Wood
# https://fbref.com/en/players/7c56da38/Christopher-Nkunku
# https://fbref.com/en/players/dc62b55d/Matheus-Cunha
# https://fbref.com/en/players/169fd162/Hwang-Hee-chan

# List to store results
results = []

# Scrape data for each player
for player_url in player_urls:
    player_name, player_stats = scrape_player_data(player_url)
    results.append((player_name, player_stats))

# Create a DataFrame for per 90 stats
df = pd.DataFrame({name: stats for name, stats in results}).T

# Save the DataFrame to a CSV file
df.to_csv("fpl_cfjan.csv")

# Display the DataFrame
print(df)


                 goals_p90  assists_p90  pens_made_p90  pens_att_p90  \
Ollie Watkins         0.20         0.40           0.00          0.00   
Dominic Solanke       1.01         0.00           0.20          0.20   
Julian Alvarez        0.42         0.21           0.21          0.21   
Gabriel Jesus         0.26         0.00           0.00          0.00   
Nicolas Jackson       0.24         0.24           0.00          0.00   
Hwang Hee chan        0.32         0.32           0.00          0.00   

                 shots_p90  shots_on_target_p90  cards_yellow_p90  \
Ollie Watkins         2.00                 0.80              0.00   
Dominic Solanke       4.83                 1.41              0.00   
Julian Alvarez        2.53                 1.05              0.00   
Gabriel Jesus         3.33                 1.28              0.26   
Nicolas Jackson       2.18                 0.97              0.24   
Hwang Hee chan        2.58                 0.65              0.65   

           