In [2]:
import pandas as pd
import json
import os

In [8]:
#read all the data for analysis and store in dataframes
defence_df = pd.read_csv("./data/defense.csv")
keepers_df = pd.read_csv("./data/keepers.csv")
misc_df = pd.read_csv("./data/misc.csv")
passing_df = pd.read_csv("./data/passing.csv")
playingtime_df = pd.read_csv("./data/playingtime.csv")
possession_df = pd.read_csv("./data/possession.csv")
shooting_df = pd.read_csv("./data/shooting.csv")
stats_df = pd.read_csv("./data/stats.csv")
tm_df = pd.read_csv("./data/tm.csv")

In [37]:
#save basic cumulative stats, like matches played, goals scored etc.
tot_matches = stats_df.at[0, "matches_played"]
tot_goals = stats_df.at[0, "ttl_gls"]
tot_xg = stats_df.at[0, "ttl_xg"]
xg_perf = tot_goals - tot_xg
tot_cs = keepers_df.at[0, "clean_sheets"]

print(str(tot_goals) +" Goals Scored in " +str(tot_matches) +" Games, from an xG of " +str(tot_xg))

print(str(tot_goals / tot_matches) +" Average goals per game")

if xg_perf >= 0:
    print("xG over-performance of: " +str(xg_perf))
else:
    print("xG under-performance of: " +str(xg_perf))

print(str(tot_cs) +" clean sheets.")

#cards numbers
tot_yel = stats_df.at[0, "ttl_yellow_cards"]
tot_red = stats_df.at[0, "ttl_red_cards"]
tot_cards = tot_yel + tot_red

print("Total Cards: " +str(tot_cards))
print(str(tot_yel) +" Yellow & " +str(tot_red) +" Red")
print(str(tot_yel / tot_matches) +" Average bookings per game")

#penalty stats
pens_scored = stats_df.at[0, "ttl_pk_made"]
pens_att = stats_df.at[0, "ttl_pk_att"]
pen_pct = str((pens_scored / pens_att) * 100) +"%"

69 Goals Scored in 38 Games, from an xG of 60.1
1.8157894736842106 Average goals per game
xG over-performance of: 8.899999999999999
8 clean sheets.
Total Cards: 76
74 Yellow & 2 Red
1.9473684210526316 Average bookings per game


In [5]:
#sort the Transfermarkt data, by transfer value, then return the top 5 players
top_players = tm_df.sort_values(by=["Value"], ascending=False).head(5)
print(top_players)

    Unnamed: 0             Name    Value           DOB   Age  Height (m)  \
2          127    Todd Cantwell  3500000  Feb 27, 1998  27.0        1.80   
31        1754    Todd Cantwell  3500000  Feb 27, 1998  27.0        1.80   
19        1021    Makhtar Gueye  3000000   Dec 4, 1997  27.0        1.95   
10         652    Makhtar Gueye  3000000   Dec 4, 1997  27.0        1.95   
7          580  Harry Pickering  2600000  Dec 29, 1998  26.0        1.73   

   Nationality            Position       Team  
2      England  Attacking Midfield  Blackburn  
31     England  Attacking Midfield  Blackburn  
19     Senegal      Centre-Forward  Blackburn  
10     Senegal      Centre-Forward  Blackburn  
7      England           Left-Back  Blackburn  


In [25]:
#get some more information about team averages
#height
avg_height = tm_df.loc[:, "Height (m)"].mean()
print("Average player height: " +str(avg_height))

#age
avg_age = tm_df.loc[:, "Age"].mean()
print("Average player age: " +str(avg_age))

Average player height: 1.8250000000000002
Average player age: 27.375


In [45]:
#get data around team attacking
#shooting
tot_shots = shooting_df.at[0, "ttl_sh"]
avg_shots = tot_shots / tot_matches
tot_sot = shooting_df.at[0, "ttl_sot"]
avg_sot = tot_sot / tot_matches
pct_sot = (tot_sot / tot_shots) * 100
print("The team has taken " +str(tot_shots) +" shots, with " +str(tot_sot) +" on target. An accuracy of " +str(pct_sot) +"%.")
print("The team takes " +str(avg_shots) +" shots per match, with " +str(avg_sot) +" shots on target per match.")


The team has taken 462 shots, with 159 on target. An accuracy of 34.41558441558442%.
The team takes 12.157894736842104 shots per match, with 4.184210526315789 shots on target per match.


In [41]:
#get data around team defending
#tackles, and positions of tackles
tot_tackles_def = defence_df.at[0, "ttl_tkl_def_third"]
tot_tackles_mid = defence_df.at[0, "ttl_tkl_mid_third"]
tot_tackles_att = defence_df.at[0, "ttl_tkl_att_third"]
tot_tackles = tot_tackles_def + tot_tackles_mid + tot_tackles_att

pct_def_tackles = (tot_tackles_def / tot_tackles) * 100
pct_mid_tackles = (tot_tackles_mid / tot_tackles) * 100
pct_att_tackles = (tot_tackles_att / tot_tackles) * 100

print("A total of " +str(tot_tackles) +" tackles have been attempted. This can be broken down into: ")
print(str(pct_att_tackles) +"% in the attacking third.")
print(str(pct_mid_tackles) +"% in the midfield third.")
print(str(pct_def_tackles) +"% in the defensive third.")
print()

#other defensive metrics
pct_cs = (tot_cs / tot_matches) * 100
print("The team keep a clean sheet in " +str(pct_cs) +"% of matches.")

tot_shots_blocked = defence_df.at[0, "ttl_sh_blocked"]
avg_blocks = tot_shots_blocked / tot_matches
print("The team blocks an average of " +str(avg_blocks) +" shots per game.")

tot_int = defence_df.at[0, "ttl_int"]
avg_int = tot_int / tot_matches
print("The team intercept an average of " +str(avg_int) +" passes per game.")

tot_conc = keepers_df.at[0, "ttl_gls_ag"]
avg_conc = tot_conc / tot_matches
print("The team have conceded " +str(tot_conc) +" league goals this season. This equates to " +str(avg_conc) +" goals against per game.")

tot_sot_ag = keepers_df.at[0, "sot_ag"]
avg_sot_ag = tot_sot_ag / tot_matches
tot_saved = keepers_df.at[0, "ttl_saves"]
avg_saves = tot_saved / tot_matches
print("The team's goalkeepers have saved " +str(tot_saved) +" of " +str(tot_sot_ag) +" shots on target.")
print(str(avg_sot_ag) +" shots faced on target per game.")
print(str(avg_saves) +" saves per match.")

#defensive penalty stats
tot_pens_ag = keepers_df.at[0, "pk_att_ag"]
tot_pens_conc = keepers_df.at[0, "pk_made_ag"]
print("The team has given away " +str(tot_pens_ag) +" penalties, of which " +str(tot_pens_conc) +" have been scored.")

A total of 609 tackles have been attempted. This can be broken down into: 
12.97208538587849% in the attacking third.
38.752052545156% in the midfield third.
48.275862068965516% in the defensive third.

The team keep a clean sheet in 21.052631578947366% of matches.
The team blocks an average of 3.1052631578947367 shots per game.
The team intercept an average of 10.842105263157896 passes per game.
The team have conceded 51 league goals this season. This equates to 1.3421052631578947 goals against per game.
The team's goalkeepers have saved 132 of 182 shots on target.
4.7894736842105265 shots faced on target per game.
3.473684210526316 saves per match.
The team has given away 7 penalties, of which 7 have been scored.


In [49]:
#get data around team passing and set pieces
tot_poss = possession_df.at[0, "avg_poss"]
print("The team has an average of " +str(tot_poss) +"% possession per match.")
print()

#touch position breakdown
tot_touch_def = possession_df.at[0, "ttl_touch_def_box"] + possession_df.at[0, "ttl_touch_def_third"]
tot_touch_mid = possession_df.at[0, "ttl_touch_mid_third"]
tot_touch_att = possession_df.at[0, "ttl_touch_fthird"]
tot_touches = tot_touch_def + tot_touch_mid + tot_touch_att
pct_touch_def = (tot_touch_def / tot_touches) * 100
pct_touch_mid = (tot_touch_mid / tot_touches) * 100
pct_touch_att = (tot_touch_att / tot_touches) * 100

print(str(pct_touch_def) +"% of touches in the defensive third.")
print(str(pct_touch_mid) +"% of touches in the midfield third.")
print(str(pct_touch_att) +"% of touches in the attacking third.")

#progressive carries
tot_carries = possession_df.at[0, "ttl_carries"]
tot_carry_dist = possession_df.at[0, "ttl_carries_prog_dist"]
avg_carries = tot_carries / tot_matches
avg_prog = tot_carry_dist / tot_matches
avg_dist_per_carry = tot_carry_dist / tot_carries
print("The team makes an average of " +str(avg_carries) +" progressive carries per game. With an average distance of " +str(avg_dist_per_carry) +" per carry.")

The team has an average of 58.1% possession per match.

35.6441383073041% of touches in the defensive third.
42.21850917274773% of touches in the midfield third.
22.13735251994817% of touches in the attacking third.
The team makes an average of 391.8421052631579 progressive carries per game. With an average distance of 2.9852921423774346 per carry.
