## Import necessary libraries

In [1]:
import pandas as pd
import scipy.stats as stats
from datetime import datetime

### Load data from CSV files

In [2]:
men_data = pd.read_csv('men_results.csv', parse_dates=['date'])
women_data = pd.read_csv('women_results.csv', parse_dates=['date'])

### 1. Filter data for official FIFA World Cup matches post 2002-01-01

In [3]:
tournament_filter = "FIFA World Cup"
date_cutoff = datetime(2002, 1, 1)

### Filter for World Cup matches after 2002

In [4]:
men_wc = men_data[(men_data['tournament'] == tournament_filter) & (men_data['date'] > date_cutoff)]
women_wc = women_data[(women_data['tournament'] == tournament_filter) & (women_data['date'] > date_cutoff)]

### 2. Add a total goals column

In [5]:
men_wc['total_goals'] = men_wc['home_score'] + men_wc['away_score']
women_wc['total_goals'] = women_wc['home_score'] + women_wc['away_score']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  men_wc['total_goals'] = men_wc['home_score'] + men_wc['away_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  women_wc['total_goals'] = women_wc['home_score'] + women_wc['away_score']


### 3. Check normality of data distributions for total goals

In [6]:
men_goals = men_wc['total_goals']
women_goals = women_wc['total_goals']

### Perform Shapiro-Wilk tests for normality

In [7]:
men_normality = stats.shapiro(men_goals)
women_normality = stats.shapiro(women_goals)

print("Men's goals normality test:", men_normality)
print("Women's goals normality test:", women_normality)

Men's goals normality test: ShapiroResult(statistic=0.9266493059134338, pvalue=8.894989197030093e-13)
Women's goals normality test: ShapiroResult(statistic=0.8491019332746058, pvalue=3.8908293844628985e-13)


### 4. Perform the Mann-Whitney U test (non-parametric) due to non-normal distribution
#### Using a one-sided test, assuming women's goals > men's goals

In [8]:
mwu_test = stats.mannwhitneyu(women_goals, men_goals, alternative='greater')

### 5. Interpret the result

In [9]:
significance_level = 0.10
p_val = mwu_test.pvalue
result = "reject" if p_val <= significance_level else "fail to reject"

### Store results in the specified dictionary format

In [10]:
result_dict = {"p_val": p_val, "result": result}

## Results

In [11]:
print("Mann-Whitney U test result:", result_dict)

Mann-Whitney U test result: {'p_val': 0.005106609825443641, 'result': 'reject'}
