In [17]:
import numpy as np
import pandas as pd
import plotly.express as px

In [18]:
df = pd.read_csv("data/team/2021_season_ovr.csv")
df['goals_scored_per_match'] = (df['GF']/df['MP']).round(2)
df['goals_allowed_per_match'] = (df['GA']/df['MP']).round(2)

df['mean']= df['GF'].mean()/24 # the same for both GF and GA by definition as it's total goals scored/2


In [19]:
fig = px.scatter(df, 
                 x="goals_scored_per_match", 
                 y = "goals_allowed_per_match",
                 labels = dict(goals_scored_per_match = "Scored", goals_allowed_per_match = "Allowed"),
                 template = 'simple_white',
                 title = "NWSL Goals Per Match, 2021",
                 text = 'Abbr',
                 width = 500,
                 height = 500
                 )

fig.update_traces(textposition = 'top center')
fig.update_xaxes(range = [0.6, 1.8], nticks = 7)
fig.update_yaxes(range = [0.6, 1.8], nticks = 7,
                 scaleanchor = "x", # make the y axis tied to X
                 scaleratio = 1)

fig.add_hline(y = 1.15, opacity = 1, line_width = 2, line_dash = 'dash', line_color = 'grey')
fig.add_vline(x = 1.15, opacity = 1, line_width = 2, line_dash = 'dash', line_color = 'grey')

fig.add_annotation(x = 1.6, y = 0.63, text = "Data from fbref.com", showarrow = False)

fig.show()

In [20]:
fig = px.scatter(df, 
                 x= "GD", 
                 y = "Pts",
                 trendline='ols',
                 labels = dict(GD = "Goal Differential", Pts = "Points"),
                 template = 'simple_white',
                 title = "NWSL Goals and Results, 2021",
                 text = 'Abbr'
                 )

fig.update_traces(textposition = 'top center') #why can't this be in the initial call?      

fig.show()

In [46]:
df2 = pd.read_csv("data/team/lou_games.csv")

# calc possession by W/D/L
df2 = df2[df2['Comp'] == 'NWSL']
df2.groupby(['Result'])[['GF', 'GA', 'Poss']].mean()

Unnamed: 0_level_0,GF,GA,Poss
Result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D,0.857143,0.857143,46.714286
L,0.25,2.666667,45.916667
W,2.4,0.4,46.8


In [35]:
goals_df = df2[['Venue', 'GF', 'GA']].melt(id_vars = ['Venue'], value_vars = ['GF', 'GA'])
old_list = ['GF', 'GA']
new_list = ['Scored', 'Allowed']

goals_df['variable'] = goals_df['variable'].replace(old_list, new_list)

grouped_df = (goals_df.groupby(['Venue', 'variable'])['value']
                      .mean()
                      .to_frame(name='Goals')
                      .reset_index())

grouped_df


Unnamed: 0,Venue,variable,Goals
0,Away,Allowed,2.083333
1,Away,Scored,0.75
2,Home,Allowed,1.25
3,Home,Scored,1.0


In [45]:
fig = px.bar(grouped_df,
             x = 'variable',
             y = 'Goals',
             color = 'variable',
             facet_col = 'Venue',
             labels = dict(variable = 'Allowed/Scored', Goals = 'Goals Per Match'),
             template = 'simple_white',
             title = "Racing Louisville Struggles with Defense on the Road")

fig.show()

In [44]:
record_df = (df2.groupby(['Venue', 'Result'])['Date']
                .count()
                .to_frame(name = 'Matches')
                .reset_index())

record_df

Unnamed: 0,Venue,Result,Matches
0,Away,D,3
1,Away,L,8
2,Away,W,1
3,Home,D,4
4,Home,L,4
5,Home,W,4


In [49]:
fig = px.bar(record_df,
             x = 'Result',
             y = 'Matches',
             color = 'Result',
             facet_col = 'Venue',
             #labels = dict(variable = 'Allowed/Scored', Goals = 'Goals Per Match'),
             template = 'simple_white',
             title = "Racing Louisville is Much Better at Home")

fig.show()