# Task for Today  

***

## Greyhound Race Result Prediction  

Given *data about greyhound racing in the UK*, let's try to predict the **winner** and **placements** of greyhounds in a given race.

We will use various classification models to make our predictions. 

# Getting Started

In [None]:
import numpy as np
import pandas as pd
pd.set_option('max_columns', None)

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings(action='ignore')

In [None]:
data = pd.read_csv('../input/greyhound-racing-uk-predict-finish-position/data_final.csv')

In [None]:
data

In [None]:
data.info()

# Basic EDA

In [None]:
data.describe()

In [None]:
non_binary_columns = [column for column in data.columns if len(data[column].unique()) > 2]

plt.figure(figsize=(24, 15))
for i in range(len(non_binary_columns)):
    plt.subplot(5, 6, i + 1)
    sns.distplot(data[non_binary_columns[i]])
plt.show()

In [None]:
corr = data.corr()

plt.figure(figsize=(21, 17))
sns.heatmap(corr, annot=True, vmin=-1.0, cmap='mako')
plt.show()

# Preprocessing

In [None]:
data

In [None]:
def preprocess_inputs(df, target='Winner'):
    df = df.copy()
    
    # Drop Race_ID column
    df = df.drop('Race_ID', axis=1)
    
    # Split df into X and y
    if target == 'Winner':
        df = df.drop('Finished', axis=1)
        y = df['Winner']
        X = df.drop('Winner', axis=1)
    elif target == 'Finished':
        df = df.drop('Winner', axis=1)
        y = df['Finished']
        X = df.drop('Finished', axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)
    
    return X_train, X_test, y_train, y_test

# Training (Winner Prediction)

In [None]:
X_train, X_test, y_train, y_test = preprocess_inputs(data, target='Winner')

In [None]:
X_train

In [None]:
y_train

In [None]:
models = {
    "   K-Nearest Neighbors": KNeighborsClassifier(),
    "   Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "         Decision Tree": DecisionTreeClassifier(),
    "        Neural Network": MLPClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

In [None]:
for name, model in models.items():
    print(name + " Accuracy: {:.2f}%".format(model.score(X_test, y_test) * 100))

# Training (Place Prediction)

In [None]:
X_train, X_test, y_train, y_test = preprocess_inputs(data, target='Finished')

In [None]:
X_train

In [None]:
y_train

In [None]:
models = {
    "   K-Nearest Neighbors": KNeighborsClassifier(),
    "   Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "         Decision Tree": DecisionTreeClassifier(),
    "        Neural Network": MLPClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

In [None]:
for name, model in models.items():
    print(name + " Accuracy: {:.2f}%".format(model.score(X_test, y_test) * 100))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/P7IRMxLJbhE