In [3]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import load_model
import keras


In [4]:
def eval_keras(df_name: str, model_name: str, threshold:float) -> list:
    # read data
    df = pd.read_csv(df_name)
    X = df.drop('Outcome', axis=1)
    y = df['Outcome']

    # convert data to arrays
    X = np.array(X)
    y = np.array(y)

    # split data into the appropriate groups
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # scale X data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # load the saved model
    model = load_model(model_name)

    # read the data for the predictions to be compared to
    real_X = pd.read_csv(df_name).drop('Outcome', axis=1)
    real_y = pd.read_csv(df_name)['Outcome']

    # loop through the test data and get recommended bets
    winning_bets = []
    losing_bets = []
    for i in range(len(real_X)):
        try:
            prediction = model.predict(scaler.transform(real_X.iloc[i].values.reshape(1, -1)), verbose=0)
            actual = real_y.iloc[i]
            if prediction > threshold and actual == 1:
                winning_bets.append([real_X.iloc[i].loc['Column501'], prediction.item()])
            elif prediction > threshold and actual == 0:
                losing_bets.append([real_X.iloc[i].loc['Column501'], prediction.item()])
        except:
            pass
    return [winning_bets, losing_bets]

In [5]:
import torch
import torch.nn as nn


def eval_pytorch(df_name: str, model_name: str, threshold:float) -> list:
    # read data
    df = pd.read_csv(df_name)
    X = df.drop('Outcome', axis=1)
    y = df['Outcome']

    # convert data to arrays
    X = np.array(X)
    y = np.array(y)

    # split data into the appropriate groups
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # scale X data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    class ANN_Model(nn.Module):
        def __init__(self, input_size):
            super(ANN_Model, self).__init__()
            self.fc1 = nn.Linear(input_size, 256)
            self.fc2 = nn.Linear(256, 128)
            self.fc3 = nn.Linear(128, 64)
            self.fc4 = nn.Linear(64, 32)
            self.fc5 = nn.Linear(32, 1)
            self.relu = nn.ReLU()
            self.dropout = nn.Dropout(0.1)

        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.dropout(x)
            x = self.relu(self.fc2(x))
            x = self.dropout(x)
            x = self.relu(self.fc3(x))
            x = self.dropout(x)
            x = self.relu(self.fc4(x))
            x = self.dropout(x)
            x = self.fc5(x)
            return x

    # load the saved model
    model = ANN_Model(input_size=X_train.shape[1])
    model.load_state_dict(torch.load(model_name))
    device = torch.device("cuda")
    model.to(device)
    model.eval()

    # loop through the test data and get recommended bets
    real_X = pd.read_csv(df_name).drop('Outcome', axis=1)
    real_y = pd.read_csv(df_name)['Outcome']

    # loop through the test data and get recommended bets
    winning_bets = []
    losing_bets = []
    for i in range(len(real_X)):
        X_tensor = torch.tensor(scaler.transform(np.array(real_X.iloc[i].values.reshape(1, -1))), dtype=torch.float32).cuda().to(device)
        y_tensor = torch.tensor([real_y.iloc[i]], dtype=torch.float32).cuda().to(device)
        outputs = model(X_tensor)
        prediction = torch.sigmoid(outputs).item()
        actual = real_y.iloc[i]

        if prediction > threshold and actual == 1:
            winning_bets.append([real_X.iloc[i].loc['Column501'], prediction])
        elif prediction > threshold and actual == 0:
            losing_bets.append([real_X.iloc[i].loc['Column501'], prediction])

    return [winning_bets, losing_bets]


In [6]:
def calculate_winnings(bet_amount, odds):
    if odds > 0:
        return (bet_amount * odds / 100)
    return bet_amount / (odds / -100)


def american_to_decimal(american_odds):
    if american_odds >= 100:
        decimal_odds = (american_odds / 100)
    else:
        decimal_odds = (100 / abs(american_odds))
    return round(decimal_odds, 2)


def calculate_kelly_criterion(american_odds, model_prob):
    decimal_odds = american_to_decimal(american_odds)
    bankroll_fraction = round((100 * (decimal_odds * model_prob - (1 - model_prob))) / decimal_odds, 2)
    return bankroll_fraction if bankroll_fraction > 0 else 0


def calculate_profit(bets:list):
    winnings = 0
    losings = 0
    for bet in bets[0]:
        odds = bet[0]
        winnings += calculate_winnings(100, odds)
    for bet in bets[1]:
        losings += calculate_winnings(100, odds)
    return winnings - losings

In [5]:
a = eval_keras("csvs/new_ml_X.csv", "built_models/keras_ml.h5")
print(len(a[0]), len(a[1]))
print(a)
# make sure to change the column numbers for the other types of data

36 19
[[[-350.0, 0.7659221887588501], [-350.0, 0.7659255266189575], [165.0, 0.9309464693069458], [155.0, 0.9327155351638794], [-205.0, 0.7077704668045044], [-110.0, 0.7290245294570923], [-300.0, 1.133025050163269], [-330.0, 1.1432695388793945], [-125.0, 0.7011667490005493], [-170.0, 0.7026174068450928], [-185.0, 0.7580926418304443], [-164.0, 0.7535481452941895], [-180.0, 0.723017156124115], [-180.0, 0.7230209112167358], [-315.0, 0.7503457069396973], [-350.0, 0.7570929527282715], [-160.0, 0.7690244913101196], [-165.0, 0.770759105682373], [-120.0, 0.7218199968338013], [-110.0, 0.7195265293121338], [-170.0, 0.800107479095459], [-200.0, 0.8010233640670776], [-110.0, 0.7178899049758911], [-113.0, 0.7180109024047852], [-315.0, 0.7404066920280457], [-310.0, 0.7391205430030823], [-130.0, 0.7174508571624756], [-110.0, 0.7045949101448059], [-165.0, 0.7427354454994202], [-170.0, 0.7446587085723877], [-425.0, 0.7082930207252502], [-450.0, 0.7146939039230347], [-145.0, 0.7371264696121216], [-155.0,

In [11]:
b = eval_pytorch("csvs/new_ml_X.csv", "built_models/torch_ml.pth", .01)
print(len(b[0]), len(b[1]))
print(b)

596 581
[[[-170.0, 0.9986165761947632], [-205.0, 0.9992614388465881], [-350.0, 0.9996615648269653], [-350.0, 0.9996616840362549], [165.0, 0.9997710585594177], [155.0, 0.9997981190681458], [-170.0, 0.9881607294082642], [-205.0, 0.9951032400131226], [-185.0, 0.9183582067489624], [-188.0, 0.9207966923713684], [-185.0, 0.419424444437027], [-185.0, 0.4193575978279114], [-110.0, 0.831697404384613], [150.0, 0.6924290657043457], [-300.0, 0.033501092344522476], [-330.0, 0.03469209372997284], [130.0, 0.048143498599529266], [155.0, 0.04723284766077995], [-250.0, 0.9003719687461853], [-290.0, 0.9026108384132385], [-190.0, 0.994504988193512], [-240.0, 0.9968709349632263], [-125.0, 0.7990659475326538], [-170.0, 0.7923315763473511], [-185.0, 0.9955452680587769], [-164.0, 0.9949719905853271], [140.0, 0.7016572952270508], [155.0, 0.6656636595726013], [-115.0, 0.7029241323471069], [-120.0, 0.7055303454399109], [125.0, 0.21267589926719666], [121.0, 0.2186564952135086], [-145.0, 0.10195992141962051], [-12

In [15]:
threshold_list = {}
for i in range(9900, 9999):
    bets = eval_pytorch("csvs/new_ml_X.csv", "built_models/torch_ml.pth", (i / 10000))
    investment = (len(bets[0]) + len(bets[1])) * 100
    profit = calculate_profit(bets) / investment
    threshold_list[i] = profit
    print(i, investment, profit)

9900 9900 0.16851789369420903
9901 9700 0.2991151663824782
9902 9900 0.1415013514602755
9903 10000 0.17863066929300525
9904 9600 0.1538177006207695
9905 10100 0.1696874182984268
9906 10100 0.1696874182984268
9907 9900 0.11459669412083562
9908 9700 0.13704074474494857
9909 9300 0.22800088358203288
9910 10100 0.1375753190546432
9911 9800 0.08265668218692888
9912 8700 0.23720214641812634
9913 9700 0.1651546946428518
9914 9000 0.2252493453345965
9915 9000 0.2252493453345965
9916 10300 0.13810281033724373
9917 9300 0.24308632561244647
9918 9000 0.23654635663160778
9919 9100 0.228714083743453
9920 9200 0.22086006979257247
9921 8900 0.233130691643867
9922 9100 0.21754121542772756
9923 9000 0.23654635663160778
9924 8800 0.2290954120703013
9925 8500 0.2539878551853876
9926 8800 0.21480631574995496
9927 8900 0.24435615561222584
9928 8700 0.24943692538718526
9929 9100 0.228714083743453
9930 8600 0.24549740947171475
9931 8600 0.2578744533125069
9932 8500 0.2391944378184408
9933 8700 0.234983586580

In [16]:
threshold_list = {}
for i in range(50, 100):
    bets = eval_keras("csvs/new_ml_X.csv", "built_models/keras_ml.h5", (i / 100))
    investment = (len(bets[0]) + len(bets[1])) * 100
    profit = calculate_profit(bets) / investment
    threshold_list[i] = profit
    print(i, investment, profit)

50 61100 0.15505435838466708
51 57300 0.15190260311109421
52 52400 0.1718171337510168
53 46400 0.17528736301027947
54 41500 0.1678274904140346
55 36700 0.16813151002353813
56 34000 0.1561394701367648
57 31000 0.14643287857278
58 27700 0.1629642113319983
59 24000 0.16367195521769665
60 21700 0.15664300072771092
61 19200 0.16089115918599683
62 17000 0.1696685892293021
63 15600 0.16700872595500332
64 13800 0.15964852911872607
65 12300 0.129830511602786
66 10200 0.23163307843966982
67 8900 0.20935821186942832
68 7100 0.2086419007191932
69 6500 0.22396100401245853
70 5500 0.29673061106147064
71 4500 0.28990365949159747
72 4300 0.28033050431788126
73 3100 0.14686898326562675
74 3000 0.1287761102940212
75 2500 0.11922237396986325
76 2100 0.05843276704621243
77 2100 0.05843276704621243
78 1700 -0.033847121736395105
79 1600 -0.004712566844919799
80 1400 0.06604278074866309
81 1100 0.15592286501377406
82 800 0.32803030303030295
83 600 0.5383838383838383
84 600 0.5383838383838383
85 600 0.5383838