# Import Packages

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.io as pio
from dash import Dash, html, dcc, Input, Output, callback

# Read Data, Clean Data, and Add Useful Columns

In [2]:
# Main dataset
# Declare path variable where dataset is saved
input_path = r'/Users/robertbanks/Desktop/Projects/World_Cup/Fifa_world_cup_matches.csv'
# Read file into Jupyter
data = pd.read_csv(input_path)

# Data is structured by match, with both teams in the same row
# Restructure so each row represents a given team's stats, not both teams' from the game

# Create game_id column in dataset
data['game_id'] = range(len(data))

# Create opponent columns in dataset
data['opponent team1'] = data['team2'] 
data['opponent team2'] = data['team1']

# Create list of headers in dataset
data_headers = []
for header in list(data):
    data_headers.append(header)
# Create lists of all headers which contain 'team1,' team2,' or neither
team1_headers = []
team2_headers = []
neutral_headers = []
for header in data_headers:
    if 'team1' in header:
        team1_headers.append(header)
    elif 'team2' in header:
        team2_headers.append(header)
    else:  
        neutral_headers.append(header)

for header in neutral_headers:
    team1_headers.append(header)
    team2_headers.append(header)

team1_headers
# Create copies of team1_headers and team2_headers so we have originals
# Going to modify below to standardize cols in a concatenated df
team1_headers_orig = team1_headers
team2_headers_orig = team2_headers

# Define function to remove 'team1' and 'team2' from header names
def format_headers(header_list, substring):
    header_list[0] = 'team'
    i = 0
    while i < len(header_list):
        header_list[i] = header_list[i].replace(substring,'').strip()
        i += 1
        
# Inside loop, we will:
    # Filter original data to create dataframes with just team1and team2 columns respectively
    # Run function for team1 and team2

header_list_options = [team1_headers, team2_headers]
header_list_orig = [team1_headers_orig, team2_headers_orig]
substring_options = ['team1', 'team2']

i = 0
while i < len(header_list_options):
    if i == 0:
        team1_df = data[header_list_orig[i]]
    else:
        team2_df = data[header_list_orig[i]]
    format_headers(header_list_options[i], substring_options[i])
    i += 1
    
# Replace headers in both team1_df and team2_df with the new headers
# Then concat into one df to use for visualizations
team1_df.columns = team1_headers
team2_df.columns = team2_headers
team_data = pd.concat([team1_df, team2_df])
team_data

# Add team rankings per Fifa to dataframe
rankings_path = r'/Users/robertbanks/Desktop/Projects/World_Cup/FIFA_Rankings.xlsx'
rankings_data_team = pd.read_excel(rankings_path, sheet_name = 'Sheet1')
rankings_data_opponent = pd.read_excel(rankings_path, sheet_name = 'Sheet2')

# Merge rankings_data with team_data to include team ranking in main dataframe
data_with_rankings = pd.merge(team_data, rankings_data_team, on = 'team')
data_with_rankings = pd.merge(data_with_rankings, rankings_data_opponent, on = 'opponent')

# Possession and team columns are currently formatted as objects
# Convert team name to string
data_with_rankings['team'] = data_with_rankings['team'].astype("string")

# Conert possession to float
data_with_rankings['possession'] = data_with_rankings['possession'].astype("string")
data_with_rankings['possession'] = data_with_rankings['possession'].str.rstrip("%").astype(float)/100

# Create column for ranking differential between team and opponent
data_with_rankings['ranking_differential'] = data_with_rankings['opponent_ranking'] - data_with_rankings['team_ranking']

# Replace spaces in column names with "_" in order to be able to change labels in plots
data_with_rankings.columns = data_with_rankings.columns.str.replace(' ', '_')

# Capitalize column headers
data_with_rankings.columns = data_with_rankings.columns.str.title()

# Plot Standardized Data

In [3]:
# Creating dash app

# Initialize app
app = Dash(__name__)

# Setplotly theme as dark
pio.templates.default = "plotly_dark"

# Plot and format scatter of possession and on target attempts
fig = px.scatter(data_with_rankings, x = 'Possession', 
                                     y = 'On_Target_Attempts',
                                     labels = dict(Possession = 'Possession', On_Target_Attempts = 'Shots on Target', Ranking_Differential = 'Ranking Differential'),
                                     hover_name = "Team", 
                                     hover_data = ['Opponent', 'Possession', 'On_Target_Attempts'],
                                     title = "Shots on Target By Possession, 2022 World Cup",
                                     color = "Ranking_Differential",
                                     color_continuous_scale = px.colors.sequential.Sunset,
                                     trendline = "ols",
                                     trendline_color_override = 'silver',
                                     opacity = .75)

# Format markers
fig.update_traces(marker_line_color = 'white', marker_line_width = .5, marker = dict(size=15))

# Format axes
fig.update_xaxes(showgrid = False)
fig.update_yaxes(showgrid = False, zeroline = False)

# App layout
app.layout = html.Div([
    dcc.Dropdown(options = [
        {'value': 'On_Target_Attempts', 'label': 'Shots On Target'},
        {'value': 'Off_Target_Attempts', 'label': 'Shots Off Target'},
        {'value': 'Total_Attempts', 'label': 'Total Shots'},
        {'value': 'Number_Of_Goals', 'label': 'Goals Scored'},
        {'value': 'Goal_Inside_The_Penalty_Area', 'label': 'Goals Inside Penalty Area'},
        {'value': 'Attempted_Defensive_Line_Breaks', 'label': 'Attempted Defensive Line Breaks'},
        {'value': 'Completed_Defensive_Line_Breaks', 'label': 'Completed Defensive Line Breaks'},
        {'value': 'Passes', 'label': 'Passes'},
        {'value': 'Passes_Completed', 'label': 'Passes Completed'},
        {'value': 'Crosses', 'label': 'Crosses'},
        {'value': 'Crosses_Completed', 'label': 'Crosses Completed'},
        {'value': 'Defensive_Pressures_Applied', 'label': 'Defensive Pressures Applied'}],
        value = 'On_Target_Attempts',
        id = 'dropdown'),
 dcc.Graph(figure = fig, id = 'plot'),])

# Build interaction
@callback(
    Output(component_id='plot', component_property='figure'),
    Input(component_id='dropdown', component_property='value')
)

def update_plot(selection):
    fig = px.scatter(data_with_rankings, x = 'Possession', 
                                     y = selection,
                                     labels = dict(Possession = 'Possession', On_Target_Attempts = 'Shots on Target', Off_Target_Attempts = 'Shots Off Target', Total_Attempts = 'Total Shots', Number_Of_Goals = 'Goals Scored', Goal_Inside_The_Penalty_Area = 'Goals Inside Penalty Area', Attempted_Defensive_Line_Breaks = 'Attempted Defensive Line Breaks', Completed_Defensive_Line_Breaks = 'Completed Defensive Line Breaks', Passes_Completed = 'Passes Completed', Crosses_Completed = 'Crosses Completed', Switches_Of_Play_Completed = 'Switches of Play', Defensive_Pressures_Applied = 'Defensive Pressures Applied', Ranking_Differential = 'Ranking Differential'),
                                     hover_name = "Team", 
                                     hover_data = ['Opponent', 'Possession', selection],
                                     title = "Actions By Possession Amount, 2022 World Cup",
                                     color = "Ranking_Differential",
                                     color_continuous_scale = px.colors.sequential.Sunset,
                                     trendline = "ols",
                                     trendline_color_override = 'silver',
                                     opacity = .75)
    # Format markers
    fig.update_traces(marker_line_color = 'white', marker_line_width = .8, marker = dict(size=15))

    # Format axes
    fig.update_xaxes(showgrid = False)
    fig.update_yaxes(showgrid = False, zeroline = False)
    
    return fig

# Run app
if __name__ == '__main__':
    app.run(debug=True)

# Learning Resources

#### Remove leading and trailing spaces 
https://www.freecodecamp.org/news/python-strip-how-to-trim-a-string-or-line/

#### Change data type / strip specific text
https://levelup.gitconnected.com/convert-percentage-string-to-numeric-and-vice-versa-in-pandas-18a3d66e2853

#### Seaborn color pallettes
https://seaborn.pydata.org/tutorial/color_palettes.html

#### Plotly express
https://plotly.com/python/hover-text-and-formatting/

#### Plotly color pallettes
https://plotly.com/python/builtin-colorscales/

#### Replace spaces in column headers
https://www.geeksforgeeks.org/remove-spaces-from-column-names-in-pandas/

#### Capitalize first letter of column header
https://stackoverflow.com/questions/39141856/capitalize-first-letter-of-each-word-in-a-dataframe-column