In [1]:
# Install necessary libraries
!pip install pandas matplotlib seaborn



### LIBRARIES

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# SUMMARY OF THIS DOCUMENT
#### In this document, we have loaded the data of different shots on goal, which we have divided between the women's and men's leagues.  In this paper, we seek to test how much the goalkeeper's height variable influences the value of xGot. To do so, we are going to perform the Pearson test and the Spearman test.

### LOADING DATA

In [6]:
# Women's leagues
xgot = pd.read_csv("Data/xgot.csv", delimiter=";", usecols=["Arriba/abajo", "X", "xGOT", "Gol (0/1)", "Altura", "Resultante\nde cada tiro"])
xgot = xgot[["Arriba/abajo", "X", "xGOT", "Gol (0/1)", "Altura", "Resultante\nde cada tiro"]]

# Men's leagues
xgot_m = pd.read_csv("Data/xgot_m.csv", delimiter=";", usecols=["Arriba/abajo", "zona", "xGOT", "Gol (0/1)", "Altura", "Resultante\nde cada tiro"])
xgot_m = xgot_m[["Arriba/abajo", "zona", "xGOT", "Gol (0/1)", "Altura", "Resultante\nde cada tiro"]]


# GENERAL TESTS (Pearson and Spearman)
### Checking in a general way the value for each of the tests.

In [7]:
# Women's league
pearson_women = xgot['xGOT'].corr(xgot['Altura'])
spearman_women = xgot['xGOT'].corr(xgot['Altura'], method='spearman')

# Men's league
pearson_men = xgot_m['xGOT'].corr(xgot_m['Altura'])
spearman_men = xgot_m['xGOT'].corr(xgot_m['Altura'], method='spearman')

print(f"Pearson (women): {pearson_women}, Spearman (women): {spearman_women}")
print(f"Pearson (men): {pearson_men}, Spearman (men): {spearman_men}")


Pearson (women): 0.02069486923605981, Spearman (women): 0.023786705883772043
Pearson (men): 0.03766921728073218, Spearman (men): 0.06102745155802024


### PEARSON AND SPEARMAN TEST FOR EACH ZONE

### High centre

In [8]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "High centre"]
resultante2_filtrada_1 = xgot[xgot['X'] == "High centre"]

pearson_high_centre_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_high_centre_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_high_centre_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_high_centre_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"High Centre Pearson (men): {pearson_high_centre_men}, Spearman (men): {spearman_high_centre_men}")
print(f"High Centre Pearson (women): {pearson_high_centre_women}, Spearman (women): {spearman_high_centre_women}")


High Centre Pearson (men): -0.02658198585055975, Spearman (men): 0.05537729365449394
High Centre Pearson (women): 0.28309642447793265, Spearman (women): 0.25000076715269487


### High left

In [9]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "High left"]
resultante2_filtrada_1 = xgot[xgot['X'] == "High left"]

pearson_high_left_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_high_left_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_high_left_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_high_left_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"High Left Pearson (men): {pearson_high_left_men}, Spearman (men): {spearman_high_left_men}")
print(f"High Left Pearson (women): {pearson_high_left_women}, Spearman (women): {spearman_high_left_women}")


High Left Pearson (men): 0.13023377098905028, Spearman (men): 0.17362825527583012
High Left Pearson (women): -0.2470112787134169, Spearman (women): -0.1688285983209536


### High right

In [10]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "High right"]
resultante2_filtrada_1 = xgot[xgot['X'] == "High right"]

pearson_high_right_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_high_right_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_high_right_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_high_right_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"High Right Pearson (men): {pearson_high_right_men}, Spearman (men): {spearman_high_right_men}")
print(f"High Right Pearson (women): {pearson_high_right_women}, Spearman (women): {spearman_high_right_women}")


High Right Pearson (men): 0.16566293721524916, Spearman (men): 0.1155719602449546
High Right Pearson (women): -0.1658519585126925, Spearman (women): -0.06437957167994164


### Low centre

In [11]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "Low centre"]
resultante2_filtrada_1 = xgot[xgot['X'] == "Low centre"]

pearson_low_centre_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_low_centre_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_low_centre_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_low_centre_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"Low Centre Pearson (men): {pearson_low_centre_men}, Spearman (men): {spearman_low_centre_men}")
print(f"Low Centre Pearson (women): {pearson_low_centre_women}, Spearman (women): {spearman_low_centre_women}")


Low Centre Pearson (men): 0.06518274658381393, Spearman (men): 0.03778038452307337
Low Centre Pearson (women): 0.04682638527164579, Spearman (women): -0.0006146908057101451


### Low left

In [12]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "Low left"]
resultante2_filtrada_1 = xgot[xgot['X'] == "Low left"]

pearson_low_left_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_low_left_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_low_left_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_low_left_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"Low Left Pearson (men): {pearson_low_left_men}, Spearman (men): {spearman_low_left_men}")
print(f"Low Left Pearson (women): {pearson_low_left_women}, Spearman (women): {spearman_low_left_women}")


Low Left Pearson (men): 0.0029396441481030736, Spearman (men): -0.016559623602087674
Low Left Pearson (women): 0.026227538865748915, Spearman (women): 0.01894354985129023


### Low right

In [13]:
resultante_filtrada_1 = xgot_m[xgot_m['zona'] == "Low right"]
resultante2_filtrada_1 = xgot[xgot['X'] == "Low right"]

pearson_low_right_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'])
spearman_low_right_men = resultante_filtrada_1['xGOT'].corr(resultante_filtrada_1['Altura'], method='spearman')
pearson_low_right_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'])
spearman_low_right_women = resultante2_filtrada_1['xGOT'].corr(resultante2_filtrada_1['Altura'], method='spearman')

print(f"Low Right Pearson (men): {pearson_low_right_men}, Spearman (men): {spearman_low_right_men}")
print(f"Low Right Pearson (women): {pearson_low_right_women}, Spearman (women): {spearman_low_right_women}")


Low Right Pearson (men): 0.13787257400216832, Spearman (men): 0.09626494529590258
Low Right Pearson (women): -0.006552526320874321, Spearman (women): 0.0231467622354955
