# Task for Today  

***

## International Football Win Prediction  

Given *data about women's football games*, let's try to predict whether the home team of a given game will **win** or not.  
  
We will use a TensorFlow neural network to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/womens-international-football-results/results.csv')

In [None]:
data

In [None]:
data.info()

# Feature Engineering + Target Creation

In [None]:
data['year'] = data['date'].apply(lambda x: x[0:4])
data['month'] = data['date'].apply(lambda x: x[5:7])

data = data.drop('date', axis=1)

In [None]:
data

In [None]:
data['home_victory'] = (data['home_score'] > data['away_score']).astype(np.int)

data = data.drop(['home_score', 'away_score'], axis=1)

In [None]:
data['neutral'] = data['neutral'].astype(np.int)

In [None]:
data

# Encoding

In [None]:
def onehot_encode(df, columns, prefixes):
    df = df.copy()
    for column, prefix in zip(columns, prefixes):
        dummies = pd.get_dummies(df[column], prefix=prefix)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df

In [None]:
data = onehot_encode(
    data,
    ['home_team', 'away_team', 'tournament', 'city', 'country'],
    ['home', 'away', 'tourn', 'city', 'country']
)

In [None]:
data

# Splitting Scaling

In [None]:
y = data.loc[:, 'home_victory']
X = data.drop('home_victory', axis=1)

In [None]:
scaler = StandardScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=67)

# Training

In [None]:
X.shape

In [None]:
y.mean()

In [None]:
inputs = tf.keras.Input(shape=(1502,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)


batch_size = 32
epochs = 20

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()]
)

In [None]:
np.argmax(history.history['val_auc'])

In [None]:
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "Epoch", 'y': "Loss"},
    title="Loss Over Time"
)

fig.show()

In [None]:
fig = px.line(
    history.history,
    y=['auc', 'val_auc'],
    labels={'x': "Epoch", 'y': "AUC"},
    title="AUC Over Time"
)

fig.show()

In [None]:
model.evaluate(X_test, y_test)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/b7uc9XDyz3A