In [None]:
from kloppy import statsbomb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Arc
import json

In [None]:
import sys

sys.path.append("/home/morten/Develop/Live-Win-Prob/utils")
from lwp_utils import get_mat_pos
from lwp_drawing_utils import draw_field

In [None]:
with open(
    "/home/morten/Develop/Open-Data/statsbomb/open-data/data/competitions.json", "r"
) as f:
    comp = json.load(f)

In [None]:
all_comps = []
for x in comp:
    all_comps.append(
        pd.read_json(
            f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/matches/{x['competition_id']}/{x['season_id']}.json"
        )
    )
all_games = pd.concat(all_comps)
all_games.reset_index()

## Helper Functions

In [None]:
def calc_xG(match_id):
    try:
        dataset = statsbomb.load(
            event_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/events/{match_id}.json",
            lineup_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/lineups/{match_id}.json",
            # Optional arguments
            coordinates="statsbomb",
            event_types=["shot"],
        )

        for event in dataset.events:
            width_mat, height_mat = get_mat_pos(
                event.raw_event["location"][0], event.raw_event["location"][1]
            )
            total_shots[height_mat][width_mat] += 1
            if event.raw_event["shot"]["outcome"]["name"] == "Goal":
                goals[height_mat][width_mat] += 1
    except (KeyError, ValueError):
        print("match error")

# Simple xG

In [None]:
total_shots = np.zeros((16, 24))
goals = np.zeros((16, 24))

In [None]:
# match_id.apply(lambda id: calc_xG(id[0]), axis=1)
# match_id1.apply(lambda id: calc_xG(id[0]), axis=1)
# match_id2.apply(lambda id: calc_xG(id[0]), axis=1)

all_games.apply(lambda id: calc_xG(id[0]), axis=1)

In [None]:
# xG = np.zeros((16,24))
# for y in range(16):
#     for x in range(24):
#         if total_shots[y][x] != 0:
#             xG[y][x]=goals[y][x]/total_shots[y][x]
#         else:
#             xG[y][x] = 0
####
# only calculate xG if more than 10 shots were taken
xG = np.zeros((16, 24))
for y in range(16):
    for x in range(24):
        if total_shots[y][x] >= 10:
            xG[y][x] = goals[y][x] / total_shots[y][x]
        else:
            xG[y][x] = 0

In [None]:
fig = plt.figure()
ax = fig.subplots()
draw_field(ax, xG, "white")
ax.set_title("simple xG")
plt.show()

In [None]:
import pickle

with open("/home/morten/Develop/Live-Win-Prob/models/simple_xG", "wb") as fp:
    pickle.dump(xG, fp)

In [None]:
for x in xG:
    for y in x:
        if y >= 0.15:
            print(y)

In [None]:
fig = plt.figure()
ax = fig.subplots()
draw_field(ax, total_shots, "white")
ax.set_title("shot locations")

## Statsbomb xG

In [None]:
statsbombXG = [[[] for y in range(24)] for x in range(16)]


def calc_statsbomb_xG(match_id):
    try:
        dataset = statsbomb.load(
            event_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/events/{match_id}.json",
            lineup_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/lineups/{match_id}.json",
            # Optional arguments
            coordinates="statsbomb",
            event_types=["shot"],
        )

        for event in dataset.events:
            width_mat, height_mat = get_mat_pos(
                event.raw_event["location"][0], event.raw_event["location"][1]
            )
            statsbombXG[height_mat][width_mat].append(
                event.raw_event["shot"]["statsbomb_xg"]
            )
    except (KeyError, ValueError):
        print("match error")

In [None]:
# match_id.apply(lambda id: calc_statsbomb_xG(id[0]), axis=1)
# match_id1.apply(lambda id: calc_statsbomb_xG(id[0]), axis=1)
# match_id2.apply(lambda id: calc_statsbomb_xG(id[0]), axis=1)

all_games.apply(lambda id: calc_statsbomb_xG(id[0]), axis=1)

In [None]:
total_shots_statsbomb = np.zeros((16, 24))
for x in range(len(statsbombXG)):
    for y in range(len(statsbombXG[x])):
        total_shots_statsbomb = len(statsbombXG[x][y])
        statsbombXG[x][y] = (
            np.sum(statsbombXG[x][y]) / len(statsbombXG[x][y])
            if len(statsbombXG[x][y]) != 0
            else 0
        )

In [None]:
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, statsbombXG, "white")
ax.set_title("statsbomb xG")

In [None]:
import pickle

with open("/home/morten/Develop/Live-Win-Prob/models/statsbomb_xG", "wb") as fp:
    pickle.dump(statsbombXG, fp)

# xT - expected Thread

### create transition Matrix T

In [None]:
pass_carry_loc = [[[] for y in range(24)] for x in range(16)]
print(len(pass_carry_loc))
print(len(pass_carry_loc[0]))

In [None]:
def pass_t(match_id):
    try:
        dataset = statsbomb.load(
            event_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/events/{match_id}.json",
            lineup_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/lineups/{match_id}.json",
            # Optional arguments
            coordinates="statsbomb",
            event_types=["pass"],
        )

        for event in dataset.events:
            # pass is completed
            if "outcome" not in event.raw_event["pass"]:
                width, height = get_mat_pos(
                    event.raw_event["location"][0], event.raw_event["location"][1]
                )
                end_width, end_height = get_mat_pos(
                    event.raw_event["pass"]["end_location"][0],
                    event.raw_event["pass"]["end_location"][1],
                )
                pass_carry_loc[height][width].append([end_height, end_width])
    except (KeyError, ValueError):
        print("match error")

In [None]:
# match_id.apply(lambda id: pass_t(id[0]), axis=1)
# match_id1.apply(lambda id: pass_t(id[0]), axis=1)
# match_id2.apply(lambda id: pass_t(id[0]), axis=1)

all_games.apply(lambda id: pass_t(id[0]), axis=1)

In [None]:
def get_t_for_pos(x, y):
    Txy = np.zeros((16, 24))
    for moves in pass_carry_loc[x][y]:
        Txy[moves[0]][moves[1]] += 1

    return Txy

In [None]:
txy = get_t_for_pos(15, 5)
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, txy, "white")
ax.set_title("passes from, to location")

In [None]:
def carry_t(match_id):
    try:
        dataset = statsbomb.load(
            event_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/events/{match_id}.json",
            lineup_data=f"/home/morten/Develop/Open-Data/statsbomb/open-data/data/lineups/{match_id}.json",
            # Optional arguments
            coordinates="statsbomb",
            event_types=["carry"],
        )

        for event in dataset.events:
            # only use carry if longer than 1 second
            if event.raw_event["duration"] > 1.5:
                width, height = get_mat_pos(
                    event.raw_event["location"][0], event.raw_event["location"][1]
                )
                end_width, end_height = get_mat_pos(
                    event.raw_event["carry"]["end_location"][0],
                    event.raw_event["carry"]["end_location"][1],
                )
                pass_carry_loc[height][width].append([end_height, end_width])
    except (KeyError, ValueError):
        print("match error")

In [None]:
# match_id.apply(lambda id: carry_t(id[0]), axis=1)
# match_id1.apply(lambda id: carry_t(id[0]), axis=1)
# match_id2.apply(lambda id: carry_t(id[0]), axis=1)

all_games.apply(lambda id: carry_t(id[0]), axis=1)

In [None]:
txy = get_t_for_pos(15, 5)
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, txy, "white")
ax.set_title("passes and carry from, to location")

In [None]:
S = np.zeros((16, 24))
for y in range(16):
    for x in range(24):
        total = total_shots[y][x] + len(pass_carry_loc[y][x])
        s = total_shots[y][x] / total
        m = len(pass_carry_loc[y][x]) / total
        S[y][x] = s

In [None]:
C = np.zeros((16, 24))
for y in range(16):
    for x in range(24):
        total = total_shots[y][x] + len(pass_carry_loc[y][x])
        s = total_shots[y][x] / total
        m = len(pass_carry_loc[y][x]) / total
        C[y][x] = m

In [None]:
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, S, "white")
ax.set_title("passes and carry from, to location")

In [None]:
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, C, "black")
ax.set_title("passes and carry from, to location")

In [None]:
def get_t_for_pos_prob(x, y):
    Txy = np.zeros((16, 24))
    move_cnt = len(pass_carry_loc[x][y])
    for moves in pass_carry_loc[x][y]:
        Txy[moves[0]][moves[1]] += 1

    for y in range(16):
        for x in range(24):
            Txy[y][x] = Txy[y][x] / move_cnt

    return Txy

In [None]:
txy = get_t_for_pos_prob(15, 5)
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, txy, "white")
ax.set_title("passes and carry from, to location")

In [None]:
corner_move_prob = get_t_for_pos_prob(0, 23)
fig = plt.figure()
ax = fig.subplots()
ax = draw_field(ax, corner_move_prob, "white")
ax.set_title("corner_move_prob")

In [None]:
XT0 = np.zeros((16, 24))
XT1 = np.zeros((16, 24))
XT2 = np.zeros((16, 24))
XT3 = np.zeros((16, 24))
XT4 = np.zeros((16, 24))

In [None]:
XT5 = np.zeros((16, 24))

In [None]:
def get_move_reward(t):
    total_move = 0
    for y in range(16):
        for x in range(24):
            total_move += t[y][x] * XT4[y][x]
    return total_move

In [None]:
for y in range(16):
    for x in range(24):
        # xT formula
        XT5[y][x] = (S[y][x] * statsbombXG[y][x]) + (
            C[y][x] * get_move_reward(get_t_for_pos_prob(y, x))
        )

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_title("xT")
cax = ax.matshow(XT5, interpolation="nearest")
fig.colorbar(cax)

In [None]:
import pickle

with open("/home/morten/Develop/Live-Win-Prob/models/statsbomb_xT", "wb") as fp:
    pickle.dump(XT5, fp)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.matshow(xG, extent=[0, 24, 0, 16], interpolation="nearest")

## sizes
STATSBOMB_WIDTH = 24
STATSBOMB_HEIGHT = 16
STANDARD_PITCH_SIZE_WIDTH = 120
STANDARD_PITCH_SIZE_WIDTH = 80

# outline and middle line
ax.plot([0, 0], [0, STATSBOMB_HEIGHT], color="black")
ax.plot([0, STATSBOMB_WIDTH], [STATSBOMB_HEIGHT, STATSBOMB_HEIGHT], color="black")
ax.plot([STATSBOMB_WIDTH, STATSBOMB_WIDTH], [STATSBOMB_HEIGHT, 0], color="black")
ax.plot([STATSBOMB_WIDTH, 0], [0, 0], color="black")
ax.plot(
    [STATSBOMB_WIDTH / 2, STATSBOMB_WIDTH / 2], [0, STATSBOMB_HEIGHT], color="black"
)

# middle circle
centreCircle = plt.Circle(
    (STATSBOMB_WIDTH / 2, STATSBOMB_HEIGHT / 2), 1.85, color="black", fill=False
)
centreSpot = plt.Circle((STATSBOMB_WIDTH / 2, STATSBOMB_HEIGHT / 2), 0.1, color="black")

ax.add_patch(centreCircle)
ax.add_patch(centreSpot)

# left penalty area
ax.plot([3.3, 3.3], [12, 4], color="black")
ax.plot([0, 3.3], [12, 12], color="black")
ax.plot([3.3, 0], [4, 4], color="black")

# Create Arc and add it to our plot
leftArc = Arc(
    (2.2, STATSBOMB_HEIGHT / 2),
    height=3.66,
    width=3.66,
    angle=0,
    theta1=310,
    theta2=50,
    color="black",
)
leftPenSpot = plt.Circle((2.2, STATSBOMB_HEIGHT / 2), 0.1, color="black")

ax.add_patch(leftArc)
ax.add_patch(leftPenSpot)

# left 6 yard box
ax.plot([1.1, 1.1], [9.8, 6.2], color="black")
ax.plot([0, 1.1], [9.8, 9.8], color="black")
ax.plot([1.1, 0], [6.2, 6.2], color="black")

# right penalty box
ax.plot([24 - 3.3, 24 - 3.3], [16 - 12, 16 - 4], color="black")
ax.plot([24 - 0, 24 - 3.3], [16 - 12, 16 - 12], color="black")
ax.plot([24 - 3.3, 24 - 0], [16 - 4, 16 - 4], color="black")

# left 6 yard box
ax.plot([24 - 1.1, 24 - 1.1], [16 - 9.8, 16 - 6.2], color="black")
ax.plot([24 - 0, 24 - 1.1], [16 - 9.8, 16 - 9.8], color="black")
ax.plot([24 - 1.1, 24 - 0], [16 - 6.2, 16 - 6.2], color="black")

# right spots
leftArc = Arc(
    (24 - 2.2, STATSBOMB_HEIGHT / 2),
    height=3.66,
    width=3.66,
    angle=0,
    theta1=130,
    theta2=230,
    color="black",
)
leftPenSpot = plt.Circle((24 - 2.2, STATSBOMB_HEIGHT / 2), 0.1, color="black")

ax.add_patch(leftArc)
ax.add_patch(leftPenSpot)


ax.show()