In [23]:
import requests
import os
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import networkx as nx
import plotly.graph_objects as go
from io import StringIO
from dash import Dash, dcc, html, Input, Output

# Function definitions for scraping and processing data
def scrape_fbref_data(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    html_content = soup.prettify().replace('<!--', '').replace('-->', '')
    df_list = pd.read_html(StringIO(html_content))
    df = df_list[-1]
    df.columns = df.columns.droplevel(0)
    return df[df['Player'] != 'Player']

def rename_duplicate_columns(df, suffix):
    cols = pd.Series(df.columns)
    for dup in cols[cols.duplicated()].unique():
        cols[cols[cols == dup].index.values.tolist()] = [dup + '_' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]
    df.columns = cols + suffix
    return df

def create_full_stats_db():
    urls = {
        'passing': 'https://fbref.com/en/comps/22/passing/Major-League-Soccer-Stats',
        'shooting': 'https://fbref.com/en/comps/22/shooting/Major-League-Soccer-Stats',
        'passing_type': 'https://fbref.com/en/comps/22/passing_types/Major-League-Soccer-Stats',
        'defense': 'https://fbref.com/en/comps/22/defense/Major-League-Soccer-Stats',
        'gca': 'https://fbref.com/en/comps/22/gca/Major-League-Soccer-Stats',
        'possession': 'https://fbref.com/en/comps/22/possession/Major-League-Soccer-Stats',
        'misc': 'https://fbref.com/en/comps/22/misc/Major-League-Soccer-Stats'
    }
    
    data_frames = {name: rename_duplicate_columns(scrape_fbref_data(url), f"_{name}") for name, url in urls.items()}
    
    index_df = data_frames['misc'][['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'Born_misc', '90s_misc']]
    for name, df in data_frames.items():
        df.drop(columns=[col for col in df.columns if col.startswith('Matches') or col.startswith('Rk')], inplace=True, errors='ignore')
        df.dropna(axis=0, how='any', inplace=True)
        index_df = pd.merge(index_df, df, left_on=['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'Born_misc', '90s_misc'],
                            right_on=[f'Player_{name}', f'Nation_{name}', f'Pos_{name}', f'Squad_{name}', f'Age_{name}', f'Born_{name}', f'90s_{name}'], how='left')
    
    index_df.fillna(0, inplace=True)
    return index_df

def position_grouping(x):
    keepers = ['GK']
    defenders = ["DF", 'DF,MF']
    wing_backs = ['FW,DF', 'DF,FW']
    defensive_mids = ['MF,DF']
    midfielders = ['MF']
    attacking_mids = ['MF,FW', "FW,MF"]
    forwards = ['FW']
    
    if x in keepers:
        return "GK"
    elif x in defenders:
        return "Defender"
    elif x in wing_backs:
        return "Wing-Back"
    elif x in defensive_mids:
        return "Defensive-Midfielders"
    elif x in midfielders:
        return "Central Midfielders"
    elif x in attacking_mids:
        return "Attacking Midfielders"
    elif x in forwards:
        return "Forwards"
    else:
        return "Unidentified Position"

def per_90fi(dataframe):
    dataframe = dataframe.replace('', np.nan)
    dataframe = dataframe.fillna(0)
    exclude_columns = ['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'Born_misc', 'position_group']
    numeric_columns = [col for col in dataframe.columns if np.issubdtype(dataframe[col].dtype, np.number) 
                       and col != '90s_misc' and not any(exc_col in col for exc_col in exclude_columns)
                       and ('90' not in col) and ('%' not in col)]
    mask = (dataframe['90s_misc'] != 0)
    dataframe.loc[mask, numeric_columns] = dataframe.loc[mask, numeric_columns].div(dataframe.loc[mask, '90s_misc'], axis=0)
    return dataframe

def key_stats_db(df, position):
    non_numeric_cols = ['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'position_group']
    core_stats = ['90s_misc', 'Cmp%_passing', 'KP_passing', 'TB_passing_type', 'Sw_passing_type', 'PPA_passing', 'PrgP_passing', 
                  'Tkl%_defense', 'Blocks_defense', 'Tkl+Int_defense', 'Clr_defense', 'PrgDist_possession', 'SCA90_gca', 'GCA90_gca', 
                  'CrsPA_passing', 'xA_passing', 'Rec_possession', 'PrgR_possession', 'xG_shooting', 'Sh_shooting']
    key_stats_df = df[df['position_group'] == position]
    key_stats_df = key_stats_df[non_numeric_cols + core_stats]
    key_stats_df = key_stats_df[pd.to_numeric(key_stats_df['90s_misc'], errors='coerce') > 5]
    key_stats_df = per_90fi(key_stats_df)
    return key_stats_df

def create_similarity_matrix(df, method='cosine'):
    player_names = df['Player_misc']
    df = df.drop(['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'position_group'], axis=1)
    if method == 'cosine':
        similarity_matrix = cosine_similarity(df)
    elif method == 'euclidean':
        similarity_matrix = -np.linalg.norm(df[:, np.newaxis] - df[np.newaxis, :], axis=2)
    else:
        raise ValueError("Method must be 'cosine' or 'euclidean'")
    return pd.DataFrame(similarity_matrix, index=player_names, columns=player_names)

def create_network(player_name, similarity_matrix, threshold=0.99):
    G = nx.Graph()
    for i, similar_player in enumerate(similarity_matrix.columns):
        if similarity_matrix.loc[player_name, similar_player] > threshold:
            G.add_edge(player_name, similar_player, weight=similarity_matrix.loc[player_name, similar_player])
    
    pos = {player_name: (0, 0)}  # Center node at the origin
    angle_step = 2 * np.pi / (len(G.nodes) - 1)
    radius = 3  # Radius of the circle

    for i, node in enumerate(G.nodes):
        if node == player_name:
            continue
        angle = i * angle_step
        pos[node] = (radius * np.cos(angle), radius * np.sin(angle))

    edge_x = []
    edge_y = []
    edge_text = []
    edge_width = []
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        edge_text.append(f"Similarity: {edge[2]['weight']:.2f}")
        edge_width.append(0.5 + (edge[2]['weight'] - threshold) * 5)  # Adjust width calculation
    
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=1, color='#FFA07A'),  # Change line color and default width
        hoverinfo='text',
        text=edge_text,
        mode='lines')
    
    node_x = []
    node_y = []
    node_text = []
    node_hover_text = []
    for node in G.nodes:
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(node)
        stats = stats_df[stats_df['Player_misc'] == node].to_dict('records')[0]  # Get player stats
        hover_text = f"Player: {node}<br>"
        hover_text += "<br>".join([f"{k}: {v}" for k, v in stats.items() if k not in ['Player_misc', 'Nation_misc', 'Pos_misc', 'Squad_misc', 'Age_misc', 'Born_misc', 'position_group']])
        node_hover_text.append(hover_text)
    
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers+text',
        text=node_text,
        textposition="top center",  # Position labels at the top center
        textfont=dict(size=10),  # Adjust font size
        hoverinfo='text',
        hovertext=node_hover_text,  
        marker=dict(
            showscale=True,
            colorscale='Viridis',  
            size=15,  #  size for better visibility
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2,
            color=[len(list(G.neighbors(node))) for node in G.nodes]
        )
    )
    
    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        title=f'Similarity Network for {player_name}',
                        titlefont_size=24,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        paper_bgcolor='#F5F5F5',  # Background color
                        plot_bgcolor='#F5F5F5'  # Plot background color
                    ))
    return fig

# Initialize the Dash app
app = Dash(__name__)

# Load and process data
stats_df = create_full_stats_db()
stats_df["position_group"] = stats_df.Pos_misc.apply(lambda x: position_grouping(x))
stats_df['90s_misc'] = pd.to_numeric(stats_df['90s_misc'], errors='coerce')
stats_df = per_90fi(stats_df)

# List of teams
teams = stats_df['Squad_misc'].unique()

# Dash layout
app.layout = html.Div([
    html.H1("Player Similarity Network"),
    dcc.Dropdown(
        id='team-dropdown',
        options=[{'label': team, 'value': team} for team in teams],
        placeholder="Select a team"
    ),
    dcc.Dropdown(
        id='player-dropdown',
        placeholder="Select a player"
    ),
    dcc.Graph(id='network-graph')
])

# Update player dropdown based on selected team
@app.callback(
    Output('player-dropdown', 'options'),
    Input('team-dropdown', 'value')
)
def set_player_options(selected_team):
    if selected_team is None:
        return []
    players = stats_df[stats_df['Squad_misc'] == selected_team]['Player_misc'].unique()
    return [{'label': player, 'value': player} for player in players]

# Update graph based on selected player
@app.callback(
    Output('network-graph', 'figure'),
    Input('player-dropdown', 'value')
)
def update_graph(selected_player):
    if selected_player is None:
        return {}
    
    # Extract similarity matrix for the position of the selected player
    position = stats_df[stats_df['Player_misc'] == selected_player]['position_group'].values[0]
    key_stats_df = key_stats_db(stats_df, position)
    similarity_matrix = create_similarity_matrix(key_stats_df, method='cosine')
    
    fig = create_network(selected_player, similarity_matrix, threshold=0.98)
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=int(os.environ.get('PORT', 8080)), host='0.0.0.0')
