In [1]:
import pandas as pd
import regex as re
import numpy as np

In [147]:
def win_conditions(row):
    if 'penalties' in row['Win Conditions']:
        return 'penalties'
    else:
        return row['Win Conditions']

In [148]:
def home_winner(row):
    # This functions will return if Spain either won or lost a match 
    if row['Home Goals'] > row['Away Goals']:
        return 'Win'
    elif row['Home Goals'] == row['Away Goals'] and row['Win Conditions'] == 'normal ending':
        return 'Draw'
    elif row['Home Goals'] == row['Away Goals'] and row['Home Team'] in row['Win Conditions']:
        return 'Win'
    else:
        return 'Loss'

In [149]:
def away_winner(row):
    if row['Away Goals'] > row['Home Goals']:
        return 'Win'
    elif row['Home Goals'] == row['Away Goals'] and row['Win Conditions'] == 'normal ending':
        return 'Draw'
    elif row['Home Goals'] == row['Away Goals'] and row['Away Team'] in row['Win Conditions']:
        return 'Win'
    else:
        return 'Loss'

In [150]:
def setting_history_matches(df):
    df['win_conditions'] =  df.apply(lambda row: win_conditions(row), axis=1)
    df['home_winner'] = df.apply(lambda row: home_winner(row), axis=1)
    df['away_winner'] = df.apply(lambda row: away_winner(row), axis=1)
    
    df = df.drop(columns={'Win Conditions'})
    
    return df

In [40]:
groups_22 = pd.read_csv('2022_world_cup_groups.csv')

In [41]:
matches_22 = pd.read_csv('2022_world_cup_matches.csv')

In [42]:
data_dict = pd.read_csv('data_dictionary.csv') ## -> fields description

In [43]:
international_matches = pd.read_csv('international_matches.csv')
international_matches.drop(columns={'ID'}, inplace= True)
international_matches.isnull().sum() #Win conditions have null values when matches have ended normally
international_matches['Win Conditions'] = international_matches['Win Conditions'].fillna('normal ending')

In [44]:
international_matches.rename(columns={"Tournament":"Stage","Home Stadium":"Host Team"},inplace = True)

In [45]:
col_list = list(international_matches)

In [46]:
international_matches = international_matches.reindex(columns=['Date','Stage','Home Team','Home Goals',
                                                               'Away Goals','Away Team','Win Conditions','Host Team'])

In [47]:
world_cup_matches = pd.read_csv('world_cup_matches.csv')
world_cup_matches.drop(columns={'ID'}, inplace= True)
world_cup_matches.isnull().sum() #Win conditions have null values when matches have ended normally
world_cup_matches['Win Conditions'] = world_cup_matches['Win Conditions'].fillna('normal ending')

In [48]:
world_cup_matches.drop(columns={'Year'},inplace=True)

In [151]:
new_wm = setting_history_matches(world_cup_matches)

In [49]:
history_matches = pd.concat([international_matches, world_cup_matches], ignore_index=True)

In [60]:
def country_vs(team_A, team_B):
    df = history_matches[((history_matches['Home Team']==team_A) & (history_matches['Away Team']==team_B))
                     | ((history_matches['Away Team']==team_A) & (history_matches['Home Team']==team_B))]
    return df
    

## Football History of Spain

In [28]:
final_df = setting_history_matches(history_matches)

In [269]:
final_df['match'] = final_df['Home Team']+ ' vs ' + final_df['Away Team']

In [277]:
home = final_df[final_df.columns.difference(['Away Team', 'Away Goals', 'away_winner'])]

In [278]:
away = final_df[final_df.columns.difference(['Home Team', 'Home Goals', 'home_winner'])]

In [279]:
home = home.loc[:, ['Date', 'Stage', 'Home Team', 'Home Goals', 'Host Team', 'home_winner', 'win_conditions', 'match']]
home.rename(columns={'Home Team':'Team', 'Home Goals':'Goals', 'home_winner':'Result'}, inplace=True)

In [280]:
away = away.loc[:, ['Date', 'Stage', 'Away Team', 'Away Goals', 'Host Team', 'away_winner', 'win_conditions', 'match']]
away.rename(columns={'Away Team':'Team', 'Away Goals':'Goals', 'away_winner':'Result'}, inplace=True)

In [282]:
df = pd.concat([home, away])

In [334]:
merged_df = pd.merge(df, df, left_on=['Date','match'], right_on=['Date','match'])

In [335]:
all_results = test.drop_duplicates(subset=['Date','match'],keep='last')

In [336]:
all_results = test[~(test['Team_x'] == test['Team_y'])]

In [342]:
final_result = all_results.loc[:,['Date', 'Stage_x', 'Team_x', 'Goals_x', 
                                  'Team_y', 'Goals_y','Result_x','win_conditions_x','Host Team_x']]

In [343]:
final_result.rename(columns={'Stage_x':'Stage','Team_x':'Team','Goals_x':'Home Goals',
                             'Team_y':'Team Against','Goals_y':'Against Goals', 'Result_x':'Result', 
                             'win_conditions_x':'Win Conditions','Host Team_x':'Host Team'},inplace=True)

In [None]:
final_result.to_csv('C:\\Users\\juanp\\proyectos_varios\\world_cup\\history_matches.csv')

In [163]:
t = pd.read_csv('teams_world_cup_22.csv')

In [47]:
teams_22 = ['Qatar', 'Ecuador', 'Senegal', 'Netherlands',
            'England', 'Iran','United States','Wales',
           'Argentina','Saudi Arabia','Mexico','Poland',
           'France','Australia','Denmark','Tunisia',
           'Spain', 'Costa Rica','Germany','Japan',
            'Belgium','Canada', 'Morocco','Croatia',
            'Brazil', 'Serbia', 'Switzerland', 'Cameroon',
            'Portugal', 'Ghana', 'Uruguay', 'South Korea']

In [56]:
teams_world_cup_22 = t[(t['Team'].isin(teams_22)) & (t['Team Against'].isin(teams_22))]

# Analyzing Spains' results 

In [164]:
spain = t[t.Team == 'Spain']

In [188]:
def results(df):
    df_results = df.groupby(['Team Against','Result']).agg({'Result':'count'}).rename(columns={'Result':'Matches'}).reset_index()
    df_results = df_results.pivot_table('Matches','Team Against','Result', fill_value=0)
    return df_results

In [166]:
spain_results = spain.groupby([
    'Team Against','Result']).agg({'Result':'count'}).rename(columns={'Result':'Matches'}).reset_index()

In [167]:
spain_results = spain_results.pivot_table('Matches','Team Against','Result', fill_value=0)

In [168]:
spain_goals = spain.groupby([
    'Team Against']).agg({'Home Goals':'sum',
                                   'Against Goals':'sum'})

In [169]:
spain_results.to_csv('C:\\Users\\juanp\\proyectos_varios\\world_cup\\spain_results.csv')

In [170]:
country_vs('Argentina', 'Spain').shape

(14, 8)

In [128]:
# s = spain_results.div(spain_results.sum(axis=1),axis=0) --> maybe I can obtain the same result directly in tableau and keep the total

# How was Spain's performance in previous World Cups?

In [173]:
a = list(world_cup_matches['Stage'].unique())

In [179]:
spain_wc =spain[spain['Stage'].isin(a)]

In [190]:
spain_wc = results(spain_wc)

In [192]:
spain_wc.to_csv('C:\\Users\\juanp\\proyectos_varios\\world_cup\\spain_wc.csv')