In [1]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By

# Specify the path to your installed chromedriver executable
CHROMEDRIVER_PATH = '/Users/marclambertes/Python/chromedriver'
MAX_OPTA_PAGE_NUM = 30
OPTA_URL = 'https://dataviz.theanalyst.com/opta-power-rankings/'

def scrape_opta_club_rankings():
    chrome_options = webdriver.ChromeOptions()    
    options = [
        '--headless',
    ]

    for option in options:
        chrome_options.add_argument(option)

    # Use the directly installed Chromedriver
    chrome_options.add_argument(f"webdriver.chrome.driver={CHROMEDRIVER_PATH}")
    driver = webdriver.Chrome(options=chrome_options)

    driver.get(OPTA_URL)
    time.sleep(3)

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find('table')
    headers = [th.text.strip() for th in table.find_all('th')]
    headers = headers + ['id']

    rows = []
    page_num = 1

    while page_num <= MAX_OPTA_PAGE_NUM:
        print(f'Scraping page {page_num}')
        
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        table = soup.find('table')

        for tr in table.find_all('tr'):
            row = [td.text.strip() for td in tr.find_all('td')]
            img = tr.select_one('img')
            if img is None:
                img_id = ''
            else:
                img_id = img['src'].split('&id=')[-1]
            row.append(img_id)
            if row:
                rows.append(row)
        
        if page_num < MAX_OPTA_PAGE_NUM:
            buttons = driver.find_elements(by=By.CSS_SELECTOR, value='button')
            last_button = buttons[-1]
            last_button.click()
            
        time.sleep(2)
        page_num += 1

    print('Done scraping Opta club rankings.')
    driver.quit()
    time.sleep(1)
    
    df = pd.DataFrame(rows, columns=headers)
    df.dropna(subset=['team'], inplace=True)

    # Save data to Excel file
    excel_filename = 'opta_club_rankings.xlsx'
    df.to_excel(excel_filename, index=False)
    print(f'Data saved to {excel_filename}')

if __name__ == "__main__":
    scrape_opta_club_rankings()


Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5
Scraping page 6
Scraping page 7
Scraping page 8
Scraping page 9
Scraping page 10
Scraping page 11
Scraping page 12
Scraping page 13
Scraping page 14
Scraping page 15
Scraping page 16
Scraping page 17
Scraping page 18
Scraping page 19
Scraping page 20
Scraping page 21
Scraping page 22
Scraping page 23
Scraping page 24
Scraping page 25
Scraping page 26
Scraping page 27
Scraping page 28
Scraping page 29
Scraping page 30
Done scraping Opta club rankings.
Data saved to opta_club_rankings.xlsx


In [3]:
import pandas as pd
from tabulate import tabulate

df = pd.read_excel('opta_club_rankings.xlsx')


# Assuming you have a DataFrame named df containing the team data

# Enter the team name
team_name = input("Enter the team name: ")

# Filter the DataFrame to get the data for the specified team
team_data = df[df['team'] == team_name]

# Get the rating and ranking of the input team
input_team_rating = team_data['rating'].values[0]
input_team_ranking = team_data['rank'].values[0]

# Print the rating and ranking for the input team
print("Input Team:")
print(f"Team: {team_name}")
print(f"Rating: {input_team_rating}")
print(f"Rank: {input_team_ranking}")
print()

# Calculate the absolute difference between the input team's rating and the ratings of all other teams
df['rating_difference'] = abs(df['rating'] - input_team_rating)

# Sort the DataFrame by the rating difference in ascending order
sorted_teams = df.sort_values('rating_difference')

# Get the 15 teams closest to the input team based on ranking
closest_teams = sorted_teams.iloc[1:16]

# Sort the closest_teams DataFrame based on the "ranking" column
closest_teams_sorted = closest_teams.sort_values('rank')

# Create a new DataFrame with the closest teams, including their rating and ranking
team_data = closest_teams_sorted[['team', 'rating', 'rank']]

# Convert the DataFrame to a table format
table = tabulate(team_data, headers='keys', tablefmt='presto')

# Print the table
print(table)

Enter the team name: Wycombe Wanderers
Input Team:
Team: Wycombe Wanderers
Rating: 69.3
Rank: 970

     | team                 |   rating |   rank
-----+----------------------+----------+--------
 962 | CS Sfaxien           |     69.3 |    963
 963 | Stade Malien Bamako  |     69.3 |    964
 964 | Niort                |     69.3 |    965
 965 | Marathón             |     69.3 |    966
 966 | Domžale              |     69.3 |    967
 967 | Lecco                |     69.3 |    968
 968 | Sabadell             |     69.3 |    969
 969 | Wycombe Wanderers    |     69.3 |    970
 970 | UTA Arad             |     69.3 |    971
 971 | ENPPI                |     69.3 |    972
 972 | Almirante Brown      |     69.3 |    973
 973 | Al-Ittihad           |     69.3 |    974
 974 | Tokyo Verdy          |     69.3 |    975
 980 | Cimarrones de Sonora |     69.2 |    981
 993 | Tarazona             |     69.2 |    994


In [4]:
## Enter the first team name
team1_name = input("Enter the first team name: ")

# Filter the DataFrame to get the data for the first team
team1_data = df[df['team'] == team1_name]

# Get the rating and ranking of the first team
team1_rating = team1_data['rating'].values[0]
team1_ranking = team1_data['rank'].values[0]

# Enter the second team name
team2_name = input("Enter the second team name: ")

# Filter the DataFrame to get the data for the second team
team2_data = df[df['team'] == team2_name]

# Get the rating and ranking of the second team
team2_rating = team2_data['rating'].values[0]
team2_ranking = team2_data['rank'].values[0]

# Calculate the difference in ranking between the two teams
ranking_difference = abs(team1_ranking - team2_ranking)

# Print the information for both teams
print("Team 1:")
print(f"Team: {team1_name}")
print(f"Rating: {team1_rating}")
print(f"Ranking: {team1_ranking}")
print()

print("Team 2:")
print(f"Team: {team2_name}")
print(f"Rating: {team2_rating}")
print(f"Ranking: {team2_ranking}")
print()

# Determine the relationship between the two teams based on ranking
if team1_ranking < team2_ranking:
    relationship = f"{team1_name} is ranked higher than {team2_name}"
elif team1_ranking > team2_ranking:
    relationship = f"{team1_name} is ranked lower than {team2_name}"
else:
    relationship = f"{team1_name} and {team2_name} are ranked equally"

# Print the relationship between the teams
print(f"Ranking Relationship: {relationship}")
print(f"Difference in Ranking: {ranking_difference} place(s)")

Enter the first team name: Wycombe Wanderers
Enter the second team name: Mladá Boleslav
Team 1:
Team: Wycombe Wanderers
Rating: 69.3
Ranking: 970

Team 2:
Team: Mladá Boleslav
Rating: 73.3
Ranking: 533

Ranking Relationship: Wycombe Wanderers is ranked lower than Mladá Boleslav
Difference in Ranking: 437 place(s)
