In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats

#import plotly.express as px
import plotly.graph_objects as go
import chart_studio

from credentials import credentials

In [None]:
#chart_studio.tools.set_credentials_file(username=credentials['username'], api_key=credentials['api_key'])

<h5>Load match data into dataFrame from CSV</h5>

In [None]:
match_df = pd.read_csv('data.csv')
match_df.head()

<h5>Add ref nationality to match_df</h5>

In [None]:
refNationality_df = pd.read_csv('refNames.csv', names = ['refName', 'refNationality'])
refNationality_df.head()

In [None]:
match_df = pd.merge(match_df, refNationality_df, on = 'refName', how = 'left')
match_df.head()

<h5>Add team nationality to match_df</h5>

In [None]:
teamNationality_df = pd.read_csv('TeamCountries.csv', names = ['teamName', 'teamNationality'])
teamNationality_df.head()

In [None]:
teamNationality_dict = teamNationality_df.set_index('teamName').to_dict()['teamNationality']

In [None]:
match_df = pd.merge(match_df, teamNationality_df, left_on = 'leftTeam', right_on = 'teamName', how = 'left')
match_df = pd.merge(match_df, teamNationality_df, left_on = 'rightTeam', right_on = 'teamName', how = 'left')
match_df.head()

In [None]:
match_df.drop(['teamName_x', 'teamName_y'], axis = 1, inplace = True)
match_df.rename(columns = {'teamNationality_x' : 'leftNationality', 'teamNationality_y' : 'rightNationality'}, inplace = True)
match_df.head()

<h5>Check team names</h5>

In [None]:
for team in teamNationality_df['teamName']:
    if (team in match_df['leftTeam'].values) == False:
        print(team, team in match_df['leftTeam'].values)

In [None]:
for team in match_df['leftTeam'].unique():
    if (team in teamNationality_df['teamName'].values) == False:
        print(team, team in teamNationality_df['teamName'].values)

We can remove "Southern Kings" and Cats from TeamNationality_df

In [None]:
teamNationality_df[(teamNationality_df['teamName'] == 'Southern Kings') | (teamNationality_df['teamName'] == 'Cats') | (teamNationality_df['teamName'] == 'undefined')]

In [None]:
teamNationality_df.drop([16, 18, 21], inplace = True)
print('Southern Kings' in teamNationality_df['teamName'])
print('Cats' in teamNationality_df['teamName'])

<h5>Create penaltyDiff(l-r) column<br>leftTeam is home team, so negative value means home team has fewer penalties against</h5>


In [None]:
match_df['penaltyDiff(l-r)'] = match_df['leftPenalties'] - match_df['rightPenalties']
match_df.head()

<h5>Create scoreDiff(l-r) column<br>leftTeam is home team, so negative value means home team has fewer penalties against</h5>


In [None]:
match_df['scoreDiff(l-r)'] = match_df['leftScore'] - match_df['rightScore']
match_df.head()

<h3>Charts</h3>

In [None]:
fig = go.Figure()

yAUS = match_df[match_df['refNationality'] == 'AUS']['penaltyDiff(l-r)']
yNZL = match_df[match_df['refNationality'] == 'NZL']['penaltyDiff(l-r)']
yZAR = match_df[match_df['refNationality'] == 'ZAR']['penaltyDiff(l-r)']

f_val, p_val = stats.f_oneway(yAUS, yNZL, yZAR)

fig.add_trace(go.Box(y = yAUS, name = 'Australian Refs', marker_color = '#FFBD00', boxmean = True, boxpoints = 'suspectedoutliers'))
fig.add_trace(go.Box(y = yNZL, name = 'New Zealand Refs', marker_color = '#000000', boxmean = True, boxpoints = 'suspectedoutliers'))
fig.add_trace(go.Box(y = yZAR, name = 'South African Refs', marker_color = '#007A4D', boxmean = True, boxpoints = 'suspectedoutliers'))

fig.update_layout(title_text="<b>All Matches, By Ref Nationality<br>2009 - 2019</b><br>ANOVA Results: F-ratio: " + str(round(f_val, 3)) + ", P-value: " + str(round(p_val, 3)))
fig.show()

In [None]:
fig = go.Figure()

yAUS = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'AUS') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yNZL = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'NZL') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yZAR = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

f_val, p_val = stats.f_oneway(yAUS, yNZL, yZAR)

fig.add_trace(go.Box(y = yAUS, name = 'Australian Ref and Home Team', marker_color = '#FFBD00', boxmean = True, boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yNZL, name = 'New Zealand Ref and Home Team', marker_color = '#000000', boxmean = True, boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yZAR, name = 'South African Ref and Home Team', marker_color = '#007A4D', boxmean = True, boxpoints = 'outliers'))

fig.update_layout(title_text = "<b>Per-match Penalty Difference, by Ref and Home Team Nationality: 2017 - 2019</b><br>Ref and Home Team From Same Country. Away Team From Different Country.<br>ANOVA Results: F-ratio: " + str(round(f_val, 3)),
                 yaxis_title = 'Penalty Difference (Home Team - Away Team)')
fig.update_layout(showlegend = False)

fig.show()

In [None]:
fig = go.Figure()

yAUS = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'AUS') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yNZL = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'NZL') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yZAR = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

AUS_mean = yAUS.mean() * -1
NZL_mean = yNZL.mean() * -1
ZAR_mean = yZAR.mean() * -1

fig = go.Figure(go.Bar(
    x=[ZAR_mean, NZL_mean, AUS_mean],
    y=['South African Ref and Home Team ', 'New Zealand Ref and Home Team ', 'Australian Ref and Home Team '],
    orientation='h',
    marker_color = ['#007A4D', '#000000', '#FFBD00']))

fig.update_layout(title_text = "<b>Average Penalty Difference, by Ref and Home Team Nationality: 2017 - 2019</b><br>Ref and Home Team From Same Country. Away Team From Different Country.",
                 xaxis_title = 'Average Penalty Difference In Favor of Home Team',
                 plot_bgcolor = '#EFFFFF')

fig.update_xaxes(gridcolor = '#DDDDDD')
fig.update_yaxes(gridcolor = '#EFFFFF')

fig.update_layout(showlegend = False)

chart_studio.plotly.plot(fig, filename = 'homeRef_2017-2019', auto_open=False)

fig.show()

In [None]:
fig = go.Figure()

yAUS = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'AUS') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yNZL = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'NZL') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yZAR = match_df[(match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

f_val, p_val = stats.f_oneway(yAUS, yNZL, yZAR)

fig.add_trace(go.Box(y = yAUS, name = 'Australian Ref and Home Team', marker_color = '#FFBD00', boxmean = True, boxpoints = 'suspectedoutliers'))
fig.add_trace(go.Box(y = yNZL, name = 'New Zealand Ref and Home Team', marker_color = '#000000', boxmean = True, boxpoints = 'suspectedoutliers'))
fig.add_trace(go.Box(y = yZAR, name = 'South African Ref and Home Team', marker_color = '#007A4D', boxmean = True, boxpoints = 'suspectedoutliers'))

fig.update_layout(title_text = "<b>Per-match Penalty Difference, by Ref and Home Team Nationality: 2009 - 2019</b><br>Ref and Home Team From Same Country. Away Team From Different Country.<br>ANOVA Results: F-ratio: " + str(round(f_val, 3)),
                 yaxis_title = 'Penalty Difference (Home Team - Away Team)')
fig.update_layout(showlegend = False)

fig.show()

In [None]:
yAUS = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'AUS') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yNZL = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'NZL') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yZAR = match_df[(match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

AUS_mean = yAUS.mean() * -1
NZL_mean = yNZL.mean() * -1
ZAR_mean = yZAR.mean() * -1

fig = go.Figure(go.Bar(
    x=[ZAR_mean, NZL_mean, AUS_mean],
    y=['South African Ref and Home Team ', 'New Zealand Ref and Home Team ', 'Australian Ref and Home Team '],
    orientation='h',
    marker_color = ['#007A4D', '#000000', '#FFBD00']))

fig.update_layout(title_text = "<b>Average Penalty Difference, by Ref and Home Team Nationality: 2009 - 2019</b><br>Ref and Home Team From Same Country. Away Team From Different Country.",
                 xaxis_title = 'Average Penalty Difference In Favor of Home Team',
                 plot_bgcolor = '#EFFFFF')

fig.update_xaxes(gridcolor = '#DDDDDD')
fig.update_yaxes(gridcolor = '#EFFFFF')

fig.update_layout(showlegend = False)

chart_studio.plotly.plot(fig, filename = 'homeRef', auto_open=False)

fig.show()

In [None]:
fig = go.Figure()

yAUS = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'AUS') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yNZL = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'NZL') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yZAR = match_df[(match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

f_val, p_val = stats.f_oneway(yAUS, yNZL, yZAR)

fig.add_trace(go.Box(y = yAUS, name = 'Australian Ref and Home Team', marker_color = '#FFBD00', boxmean = True, boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yNZL, name = 'New Zealand Ref and Home Team', marker_color = '#000000', boxmean = True, boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yZAR, name = 'South African Refand Home Team', marker_color = '#007A4D', boxmean = True, boxpoints = 'outliers'))

fig.update_layout(title_text = "<b>Per-match Penalty Difference, by Ref and Home Team Nationality: 2009 - 2019</b><br>Ref and Home Team From Same Country. Away Team From Different Country.<br>ANOVA Results: F-ratio: " + str(round(f_val, 3)),
                 yaxis_title = 'Penalty Difference (Home Team - Away Team)')
fig.update_layout(showlegend = False)

fig.show()

In [None]:
yAUS_neu = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yAUS_non = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'AUS')]['penaltyDiff(l-r)']

yNZL_neu = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yNZL_non = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'NZL')]['penaltyDiff(l-r)']

yZAR_non = match_df[(match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

neutralAverage = match_df[((match_df['refNationality'] == 'AUS') | ((match_df['refNationality'] == 'NZL'))) & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != match_df['refNationality'])]['penaltyDiff(l-r)'].mean()

neutralAverage *= -1

neuAUS_mean = yAUS_neu.mean() * -1
nonAUS_mean = yAUS_non.mean() * -1
neuNZL_mean = yNZL_neu.mean() * -1
nonNZL_mean = yNZL_non.mean() * -1
nonZAR_mean = yZAR_non.mean() * -1

fig = go.Figure(go.Bar(
    x=[nonZAR_mean, nonNZL_mean, nonAUS_mean, neuNZL_mean, neuAUS_mean],
    y=['Non-neutral South African Ref ', 'Non-neutral New Zealand Ref ', 'Non-neutral Australian Ref ', 'Neutral New Zealand Ref ', 'Neutral Australian Ref '],
    orientation='h',
    marker_color = ['#007A4D', '#000000', '#FFBD00', '#DEDEDE', '#DEDEDE'],
    showlegend = False))

fig.add_trace(go.Scatter(
    x=[1],
    y=[5],
    marker = {'color' : "#FF0000"},
    mode = 'lines',
    visible='legendonly',
    name = "Average of Neutral Means: " + str(round(neutralAverage, 2))
))

fig.add_shape(type="line",
              x0 = neutralAverage, x1 = neutralAverage,
              yref = 'paper',
              y0 = 0, y1 = 1,
              line = {'color' : "#FF9999"}
            )

fig.update_layout(title_text="<b>Average Penalty Difference by Ref Nationality and Neutrality, 2009 - 2019</b><br>Home team from South Africa. Away team not from South Africa.",
                 xaxis_title = 'Average Penalty Difference in Favor of Home Team',
                 plot_bgcolor = '#EFFFFF')

fig.update_xaxes(gridcolor = '#DDDDDD')
fig.update_yaxes(gridcolor = '#EFFFFF')

fig.update_layout(
    legend = dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.99
))

chart_studio.plotly.plot(fig, filename = 'homeSA_awayNonSA', auto_open=False)

fig.show()

In [None]:
fig = go.Figure()

yAUS_non = match_df[(match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'AUS')]['penaltyDiff(l-r)'].mean()
yNZL_non = match_df[(match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'NZL')]['penaltyDiff(l-r)'].mean()
yZAR_non = match_df[(match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)'].mean()

neutralAverage = match_df[((match_df['refNationality'] == 'AUS') | ((match_df['refNationality'] == 'NZL'))) & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != match_df['refNationality'])]['penaltyDiff(l-r)'].mean()

x = [abs(yZAR_non - neutralAverage), abs(yNZL_non - neutralAverage), abs(yAUS_non - neutralAverage)]
y = ['Non-neutral South African Refs ', 'Non-neutral New Zealand Refs ', 'Non-neutral Australian Refs ']

fig.add_trace(go.Bar(x = x,
                     y = y,
                     orientation = 'h',
                     marker_color = ['#007A4D', '#000000', '#FFBD00'],
                     showlegend = False))

fig.update_layout(title_text="<b>Dissimilarity Between Neutral Average and Non-neutral Penalty Difference, 2009 - 2019</b><br>Home team from South Africa. Away team not from South Africa.",
                 xaxis_title = 'Dissimilarity Between Neutral and Non-neutral Penalty Difference',
                 plot_bgcolor = '#EFFFFF')

fig.update_xaxes(gridcolor = '#DDDDDD')
fig.update_yaxes(gridcolor = '#EFFFFF')

chart_studio.plotly.plot(fig, filename = 'diff_non-neu', auto_open = False)

fig.show()

In [None]:
fig = go.Figure()

yAUS_neu = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != 'AUS')]['penaltyDiff(l-r)']
yAUS_non = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'AUS') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'AUS')]['penaltyDiff(l-r)']

yNZL_neu = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR') & (match_df['rightNationality'] != 'NZL')]['penaltyDiff(l-r)']
yNZL_non = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'NZL') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] == 'NZL')]['penaltyDiff(l-r)']

yZAR_non = match_df[(match_df['matchYear'] >= 2017) & (match_df['refNationality'] == 'ZAR') & (match_df['leftNationality'] == 'ZAR') & (match_df['rightNationality'] != 'ZAR')]['penaltyDiff(l-r)']

f_val, p_val = stats.f_oneway(yAUS_neu, yAUS_non, yNZL_neu, yNZL_non, yZAR_non)

fig.add_trace(go.Box(y = yAUS_neu, name = 'Australian Refs, Neutral', marker_color = '#DD9B00', boxmean = True,  boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yAUS_non, name = 'Australian Refs, Not Neutral', marker_color = '#FFBD00', boxmean = True, boxpoints = 'outliers'))

fig.add_trace(go.Box(y = yNZL_neu, name = 'New Zealand Refs, Neutral', marker_color = '#666666', boxmean = True, boxpoints = 'outliers'))
fig.add_trace(go.Box(y = yNZL_non, name = 'New Zealand Refs, Not Neutral', marker_color = '#000000', boxmean = True, boxpoints = 'outliers'))

fig.add_trace(go.Box(y = yZAR_non, name = 'South African Refs, not Neutral', marker_color = '#007A4D', boxmean = True, boxpoints = 'outliers'))

fig.update_layout(title_text="<b>Per-match Penalty Difference by Ref Nationality and Neutrality, 2017 - 2019</b><br>Home team from South Africa. Away team not from South Africa.<br>ANOVA Results: F-ratio: " + str(round(f_val, 3)),
                 yaxis_title = 'Penalty Difference (Home Team - Away Team)')
fig.update_layout(showlegend = False)

fig.show()