# Top three vs buttom three in Allsvenskan 2021

In [3]:
import pandas as pd
import glob, json

import plotly.express as px
import plotly.graph_objects as go

In [4]:
# Read events from all games for allsvenskan 2021 and store it in one huge dataframe

files = glob.glob('../../playmakeropendata/allsvenskan, 2021/[0-9]*.json')
dfs = []
for file in files:
    with open(file) as f:
        json_data = pd.json_normalize(json.loads(f.read())['events'])
    dfs.append(json_data)
df = pd.concat(dfs)

In [6]:
# Add a new column: game_time which makes it possible to group data based on game time.
# Each 10 min interval gets their own category
# Eents happening between min 0-9 -> gets 0 time_section, 10-19 gets 1 etc.
df['time_section'] = df['game_time'].apply(lambda x: "0" if len(str(x)) < 2 else str(x)[0])

In [8]:
# Create one dataframe for the three top teams Malmö, Aik and Djurgården
# Create one dataframe for the last three teams Östersund, Örebro and Halmstad

TOP_TEAMS = ['Malmö FF', 'AIK', 'Djurgården']
LAST_TEAMS = ['Östersund', 'Örebro', 'Halmstad']

df_top_teams = df.loc[(df.team).isin(TOP_TEAMS)]
df_last_teams = df.loc[(df.team).isin(LAST_TEAMS)]

In [13]:
# Plot number of occurences per event for top three vs last three teams.
# One plot per action event.

for column in df.action.unique():

    tmp_top = df_top_teams.loc[(df_top_teams['action'] == column)].groupby(['time_section', 'action']).size().reset_index().rename(columns={0:"count"})
    tmp_last = df_last_teams.loc[(df_last_teams['action'] == column)].groupby(['time_section', 'action']).size().reset_index().rename(columns={0:"count"})

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=tmp_top["time_section"], y=tmp_top["count"], mode="lines+markers", name="Top"))
    fig.add_trace(go.Scatter(x=tmp_last["time_section"], y=tmp_last["count"], mode="lines+markers", name="Buttom"))

    fig.update_layout(title=f'Top three vs buttom three: {column}',
                       xaxis_title='Game time (grouped by 10m)',
                       yaxis_title='Number of occurences')

    fig.show()

In [10]:
# Plot number of occurences per attack type for top three vs last three teams.
# One plot per attack type.

for column in df.attack_type.unique():
    
    tmp_top = df_top_teams.loc[(df_top_teams['attack_type'] == column)].groupby(['time_section', 'attack_type']).size().reset_index().rename(columns={0:"count"})
    tmp_last = df_last_teams.loc[(df_last_teams['attack_type'] == column)].groupby(['time_section', 'attack_type']).size().reset_index().rename(columns={0:"count"})

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=tmp_top["time_section"], y=tmp_top["count"], mode="lines+markers", name="Top"))
    fig.add_trace(go.Scatter(x=tmp_last["time_section"], y=tmp_last["count"], mode="lines+markers", name="Last"))

    fig.update_layout(title=f'Top 3 vs last 3 in: {column}',
                       xaxis_title='Game time (grouped 10m)',
                       yaxis_title='Count')

    fig.show()