In [6]:
import numpy as np
import pandas as pd
import altair as alt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
df = pd.read_csv("data/team/2021_season_ovr.csv")
df['goals_scored_per_match'] = df['GF']/df['MP']
df['goals_allowed_per_match'] = df['GA']/df['MP']

df['mean']= df['GF'].mean()/24 # the same for both GF and GA by definition as it's total goals scored/2


In [48]:
fig = px.scatter(df, 
                 x="goals_scored_per_match", 
                 y = "goals_allowed_per_match",
                 labels = dict(goals_scored_per_match = "Scored", goals_allowed_per_match = "Allowed"),
                 template = 'simple_white',
                 title = "NWSL Goals Per Match, 2021",
                 text = 'Abbr',
                 width = 500,
                 height = 500
                 )

fig.update_traces(textposition = 'top center')
fig.update_xaxes(range = [0.6, 1.8], nticks = 7)
fig.update_yaxes(range = [0.6, 1.8], nticks = 7,
                 scaleanchor = "x",
                 scaleratio = 1)

fig.add_hline(y = 1.15, opacity = 1, line_width = 2, line_dash = 'dash', line_color = 'grey')
fig.add_vline(x = 1.15, opacity = 1, line_width = 2, line_dash = 'dash', line_color = 'grey')

fig.add_annotation(x = 1.6, y = 0.63, text = "Data from fbref.com", showarrow = False)

fig.show()

In [87]:
# Graph in Altair

points = alt.Chart(df).mark_point().encode(
    alt.X('goals_scored_per_match',
        scale = alt.Scale(domain = (0.5, 1.8)),
        axis = alt.Axis(title = "Goals Scored")
        ),
    alt.Y('goals_allowed_per_match',
        scale = alt.Scale(domain = (0.5, 1.8)),
        axis = alt.Axis(title = "Goals Allowed")
        ),
    color = alt.value('blue')
).properties(
    width = 500,
    height = 500
)

text = points.mark_text(
    align = 'left',
    baseline = 'middle',
    dx = 7,
).encode(
    text = 'Abbr'
)

x_line = alt.Chart(df).mark_rule(strokeDash= [10, 10]).encode(
    x = 'mean',
    opacity = alt.value(0.2),
    color = alt.value('grey')
)

y_line = alt.Chart(df).mark_rule(strokeDash= [10, 10]).encode(
    y = 'mean',
    color = alt.value('grey')
)


alt.layer(
    points, text, x_line, y_line
).properties(
    title = {
        "text": "NWSL Goals Per Match, 2021",
        "subtitle" : "Data from fbref.com",
        "subtitleColor" : "grey"
    }
)

In [79]:
points = alt.Chart(df).mark_point().encode(
    alt.X('GD',
        axis = alt.Axis(title = "Goal Differential")
        ),
    alt.Y('Pts',
        scale = alt.Scale(zero = False), #does not have to start at zero - but this restricts it to the exact range of the data, often putting labels on the edge
        #scale = alt.Scale(domain = (15, 45)), #looks better, but must be set manually
        axis = alt.Axis(title = "Points")
        ),
    color = alt.value('blue')
)

text = points.mark_text(
    align = 'left',
    baseline = 'middle',
    dx = 7,
).encode(
    text = 'Abbr'
)

lm_line = points.transform_regression('GD', 'Pts').mark_line(size = 1).encode(color = alt.value("grey"))

alt.layer(
    points, text, lm_line
).properties(
    title = "NWSL Goal Differential and Points"
)

In [83]:
alt.Chart(df).mark_bar().encode(
    alt.Y('Abbr',
        sort = '-x',
        axis = alt.Axis(title = "")
        ),
    alt.X('Attendance',
        axis = alt.Axis(title = "Avg. Attendance")
        ),
    color = alt.value('blue')
).properties(
    title = "NWSL Home Attendance, 2021"
)



In [94]:
df2 = pd.read_csv("data/team/lou_games.csv")
df_home = df2[df2['Venue'] == 'Home']
df_home = df_home[df_home['Comp'] == 'NWSL']
wrong_way = df_home['Attendance'].sum()/12
df_home['Attendance'].mean()

6609.545454545455

In [115]:
# calc possession by W/D/L
df2 = df2[df2['Comp'] == 'NWSL']
df2.groupby(['Result'])[['GF', 'GA', 'Poss']].mean()

Unnamed: 0_level_0,GF,GA,Poss
Result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
D,0.857143,0.857143,46.714286
L,0.25,2.666667,45.916667
W,2.4,0.4,46.8


In [116]:
df2.groupby(['Venue'])[['GF', 'GA', 'Poss']].mean()

Unnamed: 0_level_0,GF,GA,Poss
Venue,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Away,0.75,2.083333,45.666667
Home,1.0,1.25,47.0


In [141]:
goals_df = df2[['Venue', 'GF', 'GA']].melt(id_vars = ['Venue'], value_vars = ['GF', 'GA'])
old_list = ['GF', 'GA']
new_list = ['Scored', 'Allowed']

goals_df['variable'] = goals_df['variable'].replace(old_list, new_list)

In [142]:
alt.Chart(goals_df).mark_bar().encode(
    y = alt.Y('variable', axis = alt.Axis(title = "")),
    x = alt.X('mean(value)', axis = alt.Axis(title = "Goals Per Game")),
    color = 'variable',
    column = 'Venue'
).properties(
    title = "Racing Louisville Goals Scored and Allowed by Home/Away"
)

In [117]:
df2.groupby(['Venue', 'Result'])['Date'].count()

Venue  Result
Away   D         3
       L         8
       W         1
Home   D         4
       L         4
       W         4
Name: Date, dtype: int64

In [108]:
line = alt.Chart(df2).mark_line().transform_window(
    rolling_mean = 'mean(GF)',
    frame = [-15, 15]
    ).encode(
    x = 'Date',
    y = 'GF'
)

points = alt.Chart(df2).mark_point().encode(
    x = 'Date',
    y = 'GF'
)

points + line

In [103]:
np.random.seed(42)

df = pd.DataFrame({
    'x': range(100),
    'y': np.random.randn(100).cumsum()
})

chart = alt.Chart(df).mark_point().encode(
    x='x',
    y='y'
)

chart + chart.transform_loess('x', 'y').mark_line()