In [None]:
import random
import numpy as np
import matplotlib as plt
import pandas as pd

In [None]:
def rand_w(min_val, max_val, num_ints):
    """
    returns a w vector of random integers
    :param min_val: (int) min value vector will have
    :param max_val: (int) max value vector will have
    :param num_ints: (int) number of integers vector will have
    :return: a list of random integers
    """
    w_vector = []
    for i in range(0,num_ints):
        n = random.randint(min_val, max_val)
        w_vector.append(n)
    return w_vector

In [None]:
def predictdot(X, w):
    """
    takes in a 2D array of attribute values and a weight vector to return
    a series of predictions per row of the table
    :param X: (dataframe) dataframe of attributes & bias term
    :param w: (list) weight vector
    :return: a series of predictions (1 or 0)
    """

    predictions = X.dot(w)
    return (predictions >= 0).astype(int)

In [None]:
def perceptron(data, alpha=0.0001, epochs=1000):
    """
    linear perceptron function that finds the weight vector of a given dataset
    :param data: (dataframe) dataframe
    :param alpha: (float) the learning rate
    :param epochs: (int) number of epochs
    :return: a list of integers ie. weight vector
    """

    data = data.copy()
    # data = data[['home_team', 'home_score', 'away_team', 'away_score', 'game_state']]

    # List of categorical column names
    cat_cols = [col for col in data.columns if data[col].dtype == 'object']

    # One-hot encoding for categorical columns
    for col in cat_cols:
        data[col] = data[col].astype('category').cat.codes

    # add a bias column to the data
    data.insert(15, 'bias', 1)

    # adding outcomes column team name
    # data['outcome'] = data.apply(lambda row:
    #                              1 if row['home_score'] > row['away_score'] else
    #                              -1 if row['home_score'] < row['away_score'] else
    #                              0, axis=1)

    # X is all attributes plus a bias column
    X = data.iloc[:, :-1].values

    # y is the actual values column
    y = data.iloc[:, -1].values

    # total number of attributes including bias column
    num_attrib = X.shape[1]

    # initial w
    w = np.random.rand(num_attrib)

    # lists to store MPE and model accuracy per epoch (for #6)
    mpe_list = []
    acc_list = []

    for i in range(epochs):

        # predict using x array and w array
        ypred = predictdot(X, w)

        # calculate accuracy for current epoch
        curr_accuracy = np.sum(ypred == y) / len(y)
        acc_list.append(curr_accuracy)

        # calculate MPE for current epoch
        curr_mpe = np.mean(np.abs(y - ypred))
        mpe_list.append(curr_mpe)

        for j in range(num_attrib):
            # update the weights if predictions mismatch
            # for each attribute in w
            w[j] = w[j] + alpha * np.sum((y - ypred) * X[:, j])

    print(data.columns)
    return acc_list, mpe_list, w

In [None]:
df = pd.read_csv('../data/team_data_v4.csv')
df = pd.DataFrame(df)
acc_list, mpe_list, w = perceptron(df)
print(w)

In [None]:
num_epochs = np.arange(0, 1000, 1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,4))

# # plot MPE
ax1.plot(num_epochs, mpe_list)
ax1.set_xlabel('Epochs')
ax1.set_ylabel('MPE')
ax1.set_title('Mean Perceptron Error')

# plot accuracy
ax2.plot(num_epochs, acc_list)
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.set_title('Model Accuracy')

# display the plot
plt.show()