In [1]:
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pandas.plotting import table
from matplotlib.table import table
from bs4 import BeautifulSoup
import base64
from io import BytesIO

file = "/workspaces/SPL/Middleput/points.xlsx"
points = pd.read_excel(file)
# Setting option to display all rows
pd.set_option('display.max_rows', None)

In [2]:
data_S1 = points[points["Season"] == 1]
data_S2 = points[points["Season"] == 2]

In [3]:
def compute_player_summaries(points_df):
    """
    Compute player summaries based on the provided data.
    
    Args:
    - points_df (pd.DataFrame): The input data containing player details and performance metrics.
    
    Returns:
    - pd.DataFrame: A summary dataframe containing computed metrics for each player.
    """
    # Define aggregation functions for the columns of interest
    aggregations = {
        'Date': 'count',
        'Penalty': 'sum',
        'Friend Referrals': 'sum',
        'Own Goals': 'sum',
        'Goals Conceded': 'sum',
        'Goals': ['mean', 'sum'],
        'Total Points': ['mean', 'sum'],
        'MVP': 'sum',
        'SPL Bonus': 'sum'
    }
    
    # Use groupby with multiple aggregation functions
    player_summary = points_df.groupby('Player').agg(aggregations)
    
    # Flatten hierarchical columns
    player_summary.columns = ['_'.join(col).strip() for col in player_summary.columns.values]
    
    # Rename columns for clarity
    columns_rename = {
        'Date_count': 'Games Played',
        'Penalty_sum': 'Penalties',
        'Friend Referrals_sum': 'Friend Referrals',
        'Own Goals_sum': 'Own Goals',
        'Goals Conceded_sum': 'Goals Conceded',
        'Goals_mean': 'GoalxG',
        'Goals_sum': 'Total Goals',
        'Total Points_mean': 'PointsxG',
        'Total Points_sum': 'Total',
        'MVP_sum': 'MVP',
        'SPL Bonus_sum': 'SPL Bonus'
    }
    player_summary.rename(columns=columns_rename, inplace=True)
    
    # Calculate games won
    games_won = points_df[points_df['Team'] == points_df['Winning Team']].groupby('Player').size()
    player_summary['Games Won'] = games_won
    player_summary['Games Won'].fillna(0, inplace=True)
    
    # Calculate win ratio
    player_summary['Win Ratio'] = player_summary['Games Won'] / player_summary['Games Played']
    
    return player_summary

def calculate_cumulative_points_and_rank(points_df):
    """
    Calculate cumulative points and rank changes for each player.
    
    Args:
    - points_df (pd.DataFrame): The input data containing player details and performance metrics.
    
    Returns:
    - pd.DataFrame: A dataframe containing the 'Rank Change' for the latest game for each player.
    """
    # Work with a deep copy to avoid modifying the original dataframe
    data_copy = points_df.copy()
    
    # Calculate cumulative Total Points for each player after each game
    data_copy['Cumulative Points'] = data_copy.groupby('Player')['Total Points'].cumsum()
    
    # Determine the player's rank based on these cumulative points after each game
    data_copy['Rank'] = data_copy.groupby('Date')['Cumulative Points'].rank(method="first", ascending=False)
    
    # Sort data to ensure we process in chronological order for each player
    data_sorted = data_copy.sort_values(by=['Player', 'Date'])
    
    # Calculate the change in rank between each game for every player
    data_sorted['Rank Change'] = data_sorted.groupby('Player')['Rank'].diff().fillna(0)
    
    # Extract the latest rank change for each player
    latest_rank_change = data_sorted.groupby('Player').apply(lambda x: x.iloc[-1])['Rank Change']
    
    return latest_rank_change

In [4]:
def generate_summary(points_df):
    # Integrate functions to generate the final summary
    player_summary = compute_player_summaries(points_df)
    latest_rank_change = calculate_cumulative_points_and_rank(points_df)

    # Add 'Rank Change' to the summary
    player_summary['Rank Change'] = latest_rank_change

    # Fill NaN values with 0
    player_summary = player_summary.fillna(0)

    # Convert specific columns to integer type
    cols_to_int = ['Games Won', 'MVP', 'SPL Bonus', 'Rank Change']
    player_summary[cols_to_int] = player_summary[cols_to_int].astype(int)

    # Calculate the overall rank based on the 'Total Points'
    player_summary['Rank'] = player_summary['Total'].rank(method="min", ascending=False).astype(int)

    # Reorder columns to have 'Rank' at the front
    column_order = ['Rank'] + [col for col in player_summary if col != 'Rank']
    final_summary = player_summary[column_order]

    # Sorting the final_summary DataFrame by 'Rank' in ascending order
    sorted_summary = final_summary.sort_values(by='Rank')

    # Reordering the columns as specified
    desired_column_order = [
        "Player", "Rank", "Games Played", "Games Won", "Win Ratio", "Penalties", 
        "Friend Referrals", "Own Goals", "Goals Conceded", "MVP", "SPL Bonus",
        "GoalxG", "Total Goals", "PointsxG", 
        "Total", "Rank Change"
    ]

    for col in sorted_summary.columns:
        if sorted_summary[col].dtype == 'float64':
            sorted_summary[col] = sorted_summary[col].round(2)
    sorted_summary['Win Ratio'] = (sorted_summary['Win Ratio'] * 100).round(0).astype(int).astype(str) + '%'
    
    # Reset index to get the "Player" column and then reorder columns
    sorted_summary = sorted_summary.reset_index()
    sorted_summary = sorted_summary[desired_column_order]
    
    return sorted_summary

In [5]:
season1_sorted = generate_summary(data_S1)
season2_sorted = generate_summary(data_S2)
# Save the games_df DataFrame as an Excel file
season1_excel = "/workspaces/SPL/Middleput/season1.xlsx"
season1_sorted.to_excel(season1_excel, index=False)
# Save the games_df DataFrame as an Excel file
season2_excel = "/workspaces/SPL/Middleput/season2.xlsx"
season2_sorted.to_excel(season2_excel, index=False)

In [6]:
def mpl_to_html(fig, css_link="/Middleput/player_data/styles_graphs.css"):
    """
    Convert a Matplotlib Figure object into a Base64-encoded PNG and return an HTML string.
    """
    buf = BytesIO()
    fig.savefig(buf, format="png")
    buf.seek(0)
    image_base64 = base64.b64encode(buf.read()).decode('utf-8').replace('\n', '')
    buf.close()
    
    # Enhanced HTML structure
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Player Graph</title>
        <link rel="stylesheet" href="{css_link}">
    </head>
    <body>
        <div class="graph-container">
            <img src='data:image/png;base64,{image_base64}' alt="Player Graph">
        </div>
    </body>
    </html>
    """
    
    return html_content


def player_graphs_with_summary_updated(season_points, season, player_name, sorted_1, sorted_2):
    player_data_season = season_points[season_points['Season'] == season]
    player_data = player_data_season[player_data_season['Player'] == player_name]
    
    if season == 1:
        season_sorted = sorted_1
    else:   
        season_sorted = sorted_2

    player_summary = season_sorted[season_sorted['Player'] == player_name]
    
    if player_data.empty:
        print(f"No data found for player: {player_name}")
        return
    
    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 10), gridspec_kw={"height_ratios": [0.2, 0.4, 0.4]})
    axes[0, 0].axis("off")
    axes[0, 1].axis("off")
    ax_center = fig.add_subplot(3, 2, (1, 2))
    ax_center.axis("off")
    
    player_summary = player_summary.drop(columns=['Player']).T
    player_summary.columns = [player_name]
    cell_text = player_summary.reset_index().values.tolist()
    tbl = table(ax_center, cellText=cell_text, loc="center", cellLoc="center")
    tbl.auto_set_font_size(False)
    tbl.set_fontsize(10)
    tbl.scale(1.0, 1.2)
    
    sns.lineplot(x='Gameweek', y='Total Points', data=player_data, ax=axes[1, 0], marker="o")
    sns.barplot(x='Gameweek', y='Goal Points', data=player_data, ax=axes[1, 1], palette="viridis")
    game_outcomes = player_data['Game Outcome'].value_counts()
    axes[2, 0].pie(game_outcomes, labels=game_outcomes.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("pastel"))
    sns.lineplot(x='Gameweek', y='Defensive Score Points', data=player_data, ax=axes[2, 1], label='Defensive Score', marker="o")
    sns.lineplot(x='Gameweek', y='Midfield Score', data=player_data, ax=axes[2, 1], label='Midfield Score', marker="o")
    
    plt.tight_layout()
    return fig

def save_player_graphs_to_html(season_points, season, sorted_1, sorted_2):
    directory_path = f"/workspaces/SPL/Middleput/player_data/s{season}/player_graphs"
    
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
    
    points = season_points[season_points['Season'] == season]
    players = points['Player'].unique()

    for player in players:
        fig = player_graphs_with_summary_updated(points, season, player, sorted_1, sorted_2)
        
        if fig:  # Only save if a figure is returned (i.e., player data exists)
            html_content = mpl_to_html(fig)
            with open(f"{directory_path}/{player}.html", "w") as file:
                file.write(html_content)
            plt.close(fig)

    return f"Graphs saved for {len(players)} players in {directory_path}"

save_player_graphs_to_html(points, 1, season1_sorted, season2_sorted)
save_player_graphs_to_html(points, 2, season1_sorted, season2_sorted)


'Graphs saved for 9 players in /workspaces/SPL/Middleput/player_data/s2/player_graphs'

In [7]:
file_sorted_s1 = "/workspaces/SPL/Middleput/season1.xlsx"
file_sorted_s2 = "/workspaces/SPL/Middleput/season2.xlsx"

import pandas as pd
from bs4 import BeautifulSoup

def update_player_summary(season_number, file_sorted):
    # Read the excel file
    df = pd.read_excel(file_sorted)

    # Update the Player column to hyperlink based on the season
    df['Player'] = df['Player'].apply(lambda x: f'<a href="/Middleput/player_data/s{season_number}/player_graphs/{x}.html">{x}</a>')

    # Convert the DataFrame to HTML again
    html_string = df.to_html(escape=False, index=False)

    # Construct the complete HTML
    html_complete = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Players Summary</title>
    </head>
    <body>
        {html_string}
    </body>
    </html>
    """

    # Save the initial HTML to a file
    output_path_initial = f"/workspaces/SPL/Middleput/players_summary_s{season_number}.html"
    with open(output_path_initial, 'w') as f:
        f.write(html_complete)

    # Parse the saved HTML using BeautifulSoup
    with open(output_path_initial, "r") as html_file:
        soup = BeautifulSoup(html_file.read(), 'html.parser')

    # Link the CSS file in the HTML
    link_tag = soup.new_tag("link")
    link_tag.attrs["rel"] = "stylesheet"
    link_tag.attrs["href"] = "styles_table.css"
    soup.head.append(link_tag)

    # Wrap the table in a container div
    container_div = soup.new_tag("div", id="table-container")
    soup.table.wrap(container_div)

    # Correct the malformed anchor tags
    for a_tag in soup.find_all('a'):
        if "<a href=" in a_tag["href"]:
            a_tag.decompose()

    # Remove residual entries
    for text_element in soup.find_all(text=True):
        if '.html"&gt;' in text_element:
            text_element.replace_with(text_element.replace('.html"&gt;', ''))

    # Save the updated HTML to a file
    output_path_updated = f"/workspaces/SPL/Output/players_stats_s{season_number}.html"
    with open(output_path_updated, "w") as html_file:
        html_file.write(str(soup.prettify()))

    return f"HTML updated for season {season_number} and saved to {output_path_updated}"

update_player_summary(1, file_sorted_s1)
update_player_summary(2, file_sorted_s2)

  for text_element in soup.find_all(text=True):
  for text_element in soup.find_all(text=True):


'HTML updated for season 2 and saved to /workspaces/SPL/Output/players_stats_s2.html'