In [1]:
!pip install pandas scipy



### LIBRARIES

In [2]:
import pandas as pd
from scipy import stats

# SUMMARY OF THIS DOCUMENT
#### In this document, we have loaded the data of different shots on goal, which we have divided between the women's and men's leagues. We have tested the null hypothesis that the mean of the XGot values in each of the leagues is the same depending if the shot was a goal or not. We will now test whether or not we reject this null hypothesis by using the t-test and look at the p-value to make our decision.

### LOADING DATA

In [3]:
# Women's leagues
xgot = pd.read_csv("Data/xgot.csv", sep=";", usecols=lambda col: col not in ['Partido', 'Equipo tiro', 'Equipo portera', 'Portera'])

# Men's leagues
xgot_m = pd.read_csv("Data/xgot_m.csv", sep=";", usecols=lambda col: col not in ['Partido', 'Equipo tiro', 'Equipo portero', 'Portero'])


### FUNCTION TO CALCULATE MEAN, VARIANCE, AND PERFORM T-TEST

In [4]:
def perform_t_test(data_men, data_women, zona, goal_value):
    grupo1 = data_men[data_men['zona'] == zona].query("`Gol (0/1)` == @goal_value")['xGOT']
    grupo2 = data_women[data_women['X'] == zona].query("`Gol (0/1)` == @goal_value")['xGOT']
    
    print(f"Zone: {zona}, Goal Value: {goal_value}")
    print("Mean of group 1 (men):", grupo1.mean())
    print("Variance of group 1 (men):", grupo1.var())
    print("Mean of group 2 (women):", grupo2.mean())
    print("Variance of group 2 (women):", grupo2.var())
    
    t_test_result = stats.ttest_ind(grupo1, grupo2, nan_policy='omit')
    print("T-test result:", t_test_result, "\n")

### CALCULATING MEANS, VARIANCES AND PERFORMING T-TEST FOR DIFFERENT ZONES


### High centre (Goal = 0)

In [5]:
perform_t_test(xgot_m, xgot, "High centre", 0)

Zone: High centre, Goal Value: 0
Mean of group 1 (men): 0.11755102040816327
Variance of group 1 (men): 0.02042486850410267
Mean of group 2 (women): 0.15478260869565216
Variance of group 2 (women): 0.037949951690821264
T-test result: TtestResult(statistic=-1.2924521283728092, pvalue=0.19829979474916795, df=142.0) 



### High centre (Goal = 1)


In [6]:
perform_t_test(xgot_m, xgot, "High centre", 1)

Zone: High centre, Goal Value: 1
Mean of group 1 (men): 0.5786666666666667
Variance of group 1 (men): 0.11461238095238094
Mean of group 2 (women): 0.5466666666666666
Variance of group 2 (women): 0.09326060606060604
T-test result: TtestResult(statistic=0.2547183032475615, pvalue=0.8010243305479151, df=25.0) 



### High left (Goal = 0)

In [7]:
perform_t_test(xgot_m, xgot, "High left", 0)

Zone: High left, Goal Value: 0
Mean of group 1 (men): 0.28595238095238096
Variance of group 1 (men): 0.03544419279907085
Mean of group 2 (women): 0.2123809523809524
Variance of group 2 (women): 0.026619047619047608
T-test result: TtestResult(statistic=1.5257838842504168, pvalue=0.13223223184439087, df=61.0) 



### High left (Goal = 1)


In [8]:
perform_t_test(xgot_m, xgot, "High left", 1)

Zone: High left, Goal Value: 1
Mean of group 1 (men): 0.6354054054054054
Variance of group 1 (men): 0.10044219219219218
Mean of group 2 (women): 0.6052941176470588
Variance of group 2 (women): 0.05262647058823529
T-test result: TtestResult(statistic=0.3509879568237149, pvalue=0.7270154498216397, df=52.0) 



### High right (Goal = 0)

In [9]:
perform_t_test(xgot_m, xgot, "High right", 0)

Zone: High right, Goal Value: 0
Mean of group 1 (men): 0.2618421052631579
Variance of group 1 (men): 0.036047866287339955
Mean of group 2 (women): 0.2619230769230769
Variance of group 2 (women): 0.021496153846153845
T-test result: TtestResult(statistic=-0.001831302890947036, pvalue=0.998544712214336, df=62.0) 



### High right (Goal = 1)

In [10]:
perform_t_test(xgot_m, xgot, "High right", 1)

Zone: High right, Goal Value: 1
Mean of group 1 (men): 0.4716129032258064
Variance of group 1 (men): 0.05148731182795699
Mean of group 2 (women): 0.5349999999999999
Variance of group 2 (women): 0.08053888888888888
T-test result: TtestResult(statistic=-0.7225367172232847, pvalue=0.47427527566301886, df=39.0) 



### Low centre (Goal = 0)

In [11]:
perform_t_test(xgot_m, xgot, "Low centre", 0)

Zone: Low centre, Goal Value: 0
Mean of group 1 (men): 0.14315068493150687
Variance of group 1 (men): 0.039699317422209675
Mean of group 2 (women): 0.12861878453038675
Variance of group 2 (women): 0.0364741927562922
T-test result: TtestResult(statistic=0.783212493240241, pvalue=0.43389597396215085, df=471.0) 



### Low centre (Goal = 1)

In [12]:
perform_t_test(xgot_m, xgot, "Low centre", 1)

Zone: Low centre, Goal Value: 1
Mean of group 1 (men): 0.5533962264150943
Variance of group 1 (men): 0.0975767053701016
Mean of group 2 (women): 0.5372727272727273
Variance of group 2 (women): 0.11802670454545454
T-test result: TtestResult(statistic=0.22400220681467567, pvalue=0.8232995118639064, df=84.0) 



### Low left (Goal = 0)

In [13]:
perform_t_test(xgot_m, xgot, "Low left", 0)

Zone: Low left, Goal Value: 0
Mean of group 1 (men): 0.27375
Variance of group 1 (men): 0.04866429640718563
Mean of group 2 (women): 0.2717567567567567
Variance of group 2 (women): 0.05548043317289893
T-test result: TtestResult(statistic=0.06342470026566342, pvalue=0.9494811110112585, df=240.0) 



### Low left (Goal = 1)

In [14]:
perform_t_test(xgot_m, xgot, "Low left", 1)

Zone: Low left, Goal Value: 1
Mean of group 1 (men): 0.655241935483871
Variance of group 1 (men): 0.07794709546289011
Mean of group 2 (women): 0.6696226415094342
Variance of group 2 (women): 0.07886523947750362
T-test result: TtestResult(statistic=-0.3133167995287543, pvalue=0.7544131011688744, df=175.0) 



### Low right (Goal = 0)

In [15]:
perform_t_test(xgot_m, xgot, "Low right", 0)

Zone: Low right, Goal Value: 0
Mean of group 1 (men): 0.2619736842105263
Variance of group 1 (men): 0.0398397873823632
Mean of group 2 (women): 0.25840579710144923
Variance of group 2 (women): 0.04732242114236999
T-test result: TtestResult(statistic=0.11970015676131941, pvalue=0.9048304113918137, df=219.0) 



### Low right (Goal = 1)

In [16]:
perform_t_test(xgot_m, xgot, "Low right", 1)

Zone: Low right, Goal Value: 1
Mean of group 1 (men): 0.6
Variance of group 1 (men): 0.0832891089108911
Mean of group 2 (women): 0.6192982456140351
Variance of group 2 (women): 0.08497449874686717
T-test result: TtestResult(statistic=-0.4029034558846663, pvalue=0.6875673541447294, df=157.0) 

