In [32]:
import pandas as pd
import plotly.graph_objects as go
from haversine import haversine_vector
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

In [3]:
ACCESS_TOKEN = "pk.eyJ1IjoiZ29yY2hha292dnYiLCJhIjoiY2xrd2prMzkzMDN0ajNkbnp0bjBheXcxZiJ9.UyiKwYPxhFvUWdUgRUbI6w"

In [4]:
last_season_results_df = pd.read_csv("D1_21-22.csv")
stadium_coords_df = pd.read_csv("stadiums-with-GPS-coordinates.csv")
last_season_results_df.head()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,D1,13/08/2021,19:30,M'gladbach,Bayern Munich,1,1,D,1,1,...,3.01,0.75,1.99,1.94,1.99,1.93,2.1,1.96,1.98,1.88
1,D1,14/08/2021,14:30,Augsburg,Hoffenheim,0,4,A,0,1,...,2.3,0.25,1.97,1.96,1.98,1.94,2.02,1.99,1.96,1.91
2,D1,14/08/2021,14:30,Bielefeld,Freiburg,0,0,D,0,0,...,1.64,0.25,1.75,2.05,1.79,2.16,1.8,2.2,1.76,2.12
3,D1,14/08/2021,14:30,Stuttgart,Greuther Furth,5,1,H,2,0,...,2.16,-0.75,2.02,1.77,2.09,1.83,2.12,1.86,2.07,1.8
4,D1,14/08/2021,14:30,Union Berlin,Leverkusen,1,1,D,1,1,...,1.76,0.0,1.92,2.01,1.92,2.01,1.95,2.02,1.91,1.96


In [5]:
teams_coords_df = stadium_coords_df.loc[stadium_coords_df["FDCOUK"].isin(last_season_results_df["HomeTeam"])]

In [6]:
last_season_results_df["HomeTeam"].unique()

array(["M'gladbach", 'Augsburg', 'Bielefeld', 'Stuttgart', 'Union Berlin',
       'Wolfsburg', 'Dortmund', 'Mainz', 'FC Koln', 'RB Leipzig',
       'Bochum', 'Ein Frankfurt', 'Freiburg', 'Greuther Furth', 'Hertha',
       'Leverkusen', 'Hoffenheim', 'Bayern Munich'], dtype=object)

In [7]:
set(last_season_results_df["HomeTeam"]) - set(teams_coords_df["FDCOUK"])

{'Bielefeld', 'Bochum', 'RB Leipzig', 'Union Berlin'}

In [8]:
missing_data = {
    'Team': ['Arminia Bielefeld', 'VfL Bochum', 'RB Leipzig', 'Union Berlin'],
    'FDCOUK': ['Bielefeld', 'Bochum', 'RB Leipzig', 'Union Berlin'],
    'City': ['Bielefeld', 'Bochum', 'Leipzig', 'Berlin'],
    'Stadium': ['SchücoArena', 'Vonovia Ruhrstadion', 'Red Bull Arena', 'Stadion An der Alten Försterei'],
    'Capacity': [27240, 30272, 42500, 22706],
    'Latitude': [52.021168, 51.481663, 51.345261, 52.453489],
    'Longitude': [8.541486, 7.222778, 12.361012, 13.288811],
    'Country': ['Germany', 'Germany', 'Germany', 'Germany']
}
missing_df = pd.DataFrame(missing_data)
missing_df

Unnamed: 0,Team,FDCOUK,City,Stadium,Capacity,Latitude,Longitude,Country
0,Arminia Bielefeld,Bielefeld,Bielefeld,SchücoArena,27240,52.021168,8.541486,Germany
1,VfL Bochum,Bochum,Bochum,Vonovia Ruhrstadion,30272,51.481663,7.222778,Germany
2,RB Leipzig,RB Leipzig,Leipzig,Red Bull Arena,42500,51.345261,12.361012,Germany
3,Union Berlin,Union Berlin,Berlin,Stadion An der Alten Försterei,22706,52.453489,13.288811,Germany


In [9]:
teams_coords_df

Unnamed: 0,Team,FDCOUK,City,Stadium,Capacity,Latitude,Longitude,Country
62,FC Augsburg,Augsburg,Augsburg,SGL arena,30660,48.3225,10.882222,Germany
63,Bayer Leverkusen,Leverkusen,Leverkusen,BayArena,30210,51.038256,7.002206,Germany
64,Bayern Munich,Bayern Munich,Munich,Allianz Arena,71000,48.218775,11.624753,Germany
65,Borussia Dortmund,Dortmund,Dortmund,Signal Iduna Park,80645,51.492569,7.451842,Germany
66,Borussia Mönchengladbach,M'gladbach,Mönchengladbach,Borussia-Park,54010,51.174583,6.385464,Germany
68,Eintracht Frankfurt,Ein Frankfurt,Frankfurt,Commerzbank-Arena,51500,50.068572,8.645458,Germany
69,SC Freiburg,Freiburg,Freiburg,MAGE SOLAR Stadion,24000,47.988889,7.893056,Germany
72,Hertha BSC,Hertha,Berlin,Olympiastadion,74244,52.514722,13.239444,Germany
73,TSG 1899 Hoffenheim,Hoffenheim,Sinsheim,Rhein-Neckar Arena,30150,49.239008,8.888281,Germany
74,1. FSV Mainz 05,Mainz,Mainz,Coface Arena,34000,49.984167,8.224167,Germany


In [10]:
TABLE_COORDS_COLUMNS = {
    "FDCOUK": "team",
    "Latitude": "lat",
    "Longitude": "lon"
}
all_teams_coords_df = pd.concat([teams_coords_df, missing_df], axis=0)
all_teams_coords_df = all_teams_coords_df[["FDCOUK", "Latitude", "Longitude"]].rename(columns=TABLE_COORDS_COLUMNS)[
        TABLE_COORDS_COLUMNS.values()]
all_teams_coords_df

Unnamed: 0,team,lat,lon
62,Augsburg,48.3225,10.882222
63,Leverkusen,51.038256,7.002206
64,Bayern Munich,48.218775,11.624753
65,Dortmund,51.492569,7.451842
66,M'gladbach,51.174583,6.385464
68,Ein Frankfurt,50.068572,8.645458
69,Freiburg,47.988889,7.893056
72,Hertha,52.514722,13.239444
73,Hoffenheim,49.239008,8.888281
74,Mainz,49.984167,8.224167


In [27]:
# quick plot of teams location on map
fig = go.Figure()
fig.add_trace(
        go.Scattermapbox(
            mode="markers+text",
            lat=all_teams_coords_df.lat.tolist(),
            lon=all_teams_coords_df.lon.tolist(),
            marker={"size": 10, "color": "black"},
            hovertext=all_teams_coords_df.team,
        )
    )
fig.update_layout(mapbox={
        "accesstoken": ACCESS_TOKEN,
        "zoom": 5,
        "center": {
            "lat": pd.concat([all_teams_coords_df.lat, all_teams_coords_df.lat], axis=0).mean(),
            "lon": pd.concat([all_teams_coords_df.lon, all_teams_coords_df.lon], axis=0).mean(),
        },
    },
    margin=dict(l=0, r=0, t=0, b=0),
    showlegend=False,
)

fig.show()
fig.write_image("../bundesliga_teams_coords.png")

In [45]:
# compute matrix of haversine distances among teams
#teams_distance_matrix_df = haversine_vector(all_teams_coords_df[["lat", "lon"]], all_teams_coords_df[["lat", "lon"]])
teams_distance_matrix_df = pd.DataFrame([haversine_vector([[row.lat, row.lon]] * len(all_teams_coords_df), all_teams_coords_df[["lat", "lon"]]) for index, row in all_teams_coords_df.iterrows()], columns = all_teams_coords_df.team, index = all_teams_coords_df.team
)
teams_distance_matrix_df

team,Augsburg,Leverkusen,Bayern Munich,Dortmund,M'gladbach,Ein Frankfurt,Freiburg,Hertha,Hoffenheim,Mainz,Stuttgart,Wolfsburg,Greuther Furth,FC Koln,Bielefeld,Bochum,RB Leipzig,Union Berlin
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Augsburg,0.0,411.132977,56.153853,429.54597,452.565843,253.180436,224.800381,495.09906,178.121166,267.378038,132.202001,456.983639,129.762217,409.270596,443.715022,438.162863,352.433034,489.937409
Leverkusen,411.132977,0.0,457.154331,59.419368,45.649148,158.434476,345.113023,459.267548,241.011021,145.612423,296.382905,304.166905,332.315154,14.662233,152.58257,51.638336,374.915214,460.3456
Bayern Munich,56.153853,457.154331,0.0,471.019883,499.8087,298.740606,278.242693,491.191285,230.532655,315.896523,187.446756,472.085225,148.255148,456.351803,476.433124,480.70422,351.640743,485.429118
Dortmund,429.54597,59.419368,471.019883,0.0,82.088744,179.199654,390.877881,412.029207,270.4922,176.309967,325.934588,252.27455,335.657972,74.020313,95.286179,15.906709,340.749501,413.731485
M'gladbach,452.565843,45.649148,499.8087,82.088744,0.0,201.340475,370.511453,493.633057,279.337266,185.400096,334.006189,334.359019,377.344114,43.467083,176.167344,67.457342,416.123525,495.17812
Ein Frankfurt,253.180436,158.434476,298.740606,179.199654,201.340475,0.0,237.664393,419.417455,93.884971,31.524636,148.12139,302.670196,180.949192,157.883968,217.240638,186.262649,297.624819,417.849896
Freiburg,224.800381,345.113023,278.242693,390.877881,370.511453,237.664393,0.0,630.312886,157.081912,223.17615,133.24476,535.600608,282.153959,335.581992,450.752868,391.350484,492.442971,627.183357
Hertha,495.09906,459.267548,491.191285,412.029207,493.633057,419.417455,630.312886,0.0,475.087098,448.179706,500.970658,165.214511,371.336945,472.122117,324.302117,427.481173,143.306835,7.585183
Hoffenheim,178.121166,241.011021,230.532655,270.4922,279.337266,93.884971,157.081912,475.087098,0.0,95.681113,55.642204,379.634481,155.32562,236.907439,310.326998,275.925795,340.096799,472.278592
Mainz,267.378038,145.612423,315.896523,176.309967,185.400096,31.524636,223.17615,448.179706,95.681113,0.0,151.279924,326.109652,206.939464,142.350069,227.589554,180.810331,328.445407,446.829982
