# What we are going to do here is get model scores for Method of Victory models (mov)

In [1]:
import pandas as pd
import numpy as np
import random
import csv
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegressionCV
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
#from sklearn.mixture import DPGMM

In [2]:
import sys
sys.path.append('../../automated_model_creation') #We need to access the function file

In [3]:
from functions import *


In [4]:
is_prod = True

if is_prod:
    model_file = '../../data/production_models_mov.csv'
else: #If not production then test
    model_file = '../../data/models_mov.csv'

In [5]:
#Load models
with open(model_file, newline='') as f:
    reader = csv.reader(f)
    models = list(reader)
    
print(len(models[0]))


17


In [6]:
model_file

'../../data/models_mov.csv'

In [7]:
df = pd.read_csv("../../data/kaggle_data/ufc-master.csv")

In [8]:
#Let's fix the date
df['date'] = pd.to_datetime(df['date'])

In [9]:
def return_finish_type(winner, finish):
    #print(winner, finish)
    #Why overcomplicate things?  We can just use a few if statements
    if winner == 'Red':
        #print("HI")
        if finish in ['U-DEC', 'S-DEC', 'M-DEC']:
            return ('Red - DEC')
        if finish in ['SUB']:
            return('Red - SUB')
        if finish in ['KO/TKO', 'DQ']:
            return('Red - KO/TKO')
    if winner == 'Blue':
        if finish in ['U-DEC', 'S-DEC', 'M-DEC']:
            return ('Blue - DEC')
        if finish in ['SUB']:
            return('Blue - SUB')
        if finish in ['KO/TKO', 'DQ']:
            return('Blue - KO/TKO')
        
    #Test for NaN
    if finish != finish:
        return('')
    
    if finish == 'Overturned':
        return('')
    
    
    return ('error')

In [10]:
#This calls for the power of lambda!
df['finish_type'] = df.apply(lambda x: return_finish_type(x['Winner'], x['finish']), axis=1)
mask = df['finish_type'] != ''
df = df[mask]

In [11]:
finish_list = ['Red - DEC', 'Red - SUB', 'Red - KO/TKO', 'Blue - DEC', 'Blue - SUB', 'Blue - KO/TKO']

#Let's put all the labels in a dataframe
df['label'] = ''
#If the winner is not Red or Blue we can remove it.

for f in range(len(finish_list)):
    mask = df['finish_type'] == finish_list[f]
    df['label'][mask] = f
    
#df["Winner"] = df["Winner"].astype('category')
#df = df[(df['Winner'] != 'Blue') | (df['Winner'] == 'Red') ]


#Make sure lable is numeric
df['label'] = pd.to_numeric(df['label'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'][mask] = f


In [12]:
df.rename(columns={'r_dec_odds': 'Red - DEC', 'r_sub_odds': 'Red - SUB', 'r_ko_odds': 'Red - KO/TKO',
                'b_dec_odds': 'Blue - DEC', 'b_sub_odds': 'Blue - SUB', 'b_ko_odds': 'Blue - KO/TKO'}, inplace=True)

In [13]:
label_df = df['label']
odds_df = df[finish_list]

In [14]:
#Split the test set.  We are always(?) going to use the last 200 matches as the test set, so we don't want those around
#as we pick models

df_train = df[250:]
odds_train = odds_df[250:]
label_train = label_df[250:]

df_test = df[:250]
odds_test = odds_df[:250]
label_test = label_df[:250]

#print(len(df_test))
#print(len(odds_test))
#print(len(label_test))

#print(len(df_train))
#print(len(odds_train))
#print(len(label_train))

In [15]:
#We need to clean
mask = df_train['finish_type'] != ''
df_train = df_train[mask]
#print(len(df_train))

mask = df_test['finish_type'] != ''
df_test = df_test[mask]
#print(len(df_test))

label_train = label_train[label_train.index.isin(df_train.index)]
label_test = label_test[label_test.index.isin(df_test.index)]

odds_train = odds_train[odds_train.index.isin(df_train.index)]
odds_test = odds_test[odds_test.index.isin(df_test.index)]


#print(len(df_train))
#print(len(label_train))
#print(len(odds_train))
#print(len(df_test))
#print(len(label_test))
#print(len(odds_test))

In [16]:
#Set a value for the nulls in the ranks

weightclass_list = ['B_match_weightclass_rank', 'R_match_weightclass_rank', "R_Women's Flyweight_rank", "R_Women's Featherweight_rank", "R_Women's Strawweight_rank", "R_Women's Bantamweight_rank", 'R_Heavyweight_rank', 'R_Light Heavyweight_rank', 'R_Middleweight_rank', 'R_Welterweight_rank', 'R_Lightweight_rank', 'R_Featherweight_rank', 'R_Bantamweight_rank', 'R_Flyweight_rank', 'R_Pound-for-Pound_rank', "B_Women's Flyweight_rank", "B_Women's Featherweight_rank", "B_Women's Strawweight_rank", "B_Women's Bantamweight_rank", 'B_Heavyweight_rank', 'B_Light Heavyweight_rank', 'B_Middleweight_rank', 'B_Welterweight_rank', 'B_Lightweight_rank', 'B_Featherweight_rank', 'B_Bantamweight_rank', 'B_Flyweight_rank', 'B_Pound-for-Pound_rank']
df_train[weightclass_list] = df_train[weightclass_list].fillna(17)
df_test[weightclass_list] = df_test[weightclass_list].fillna(17)


In [17]:
score_list = []

In [18]:
score_list = []
for model_num in range(len(models[0])):
    #print(test_model_features)
    #print(len())
    test_model_name = models[0][model_num]
    test_model = eval(models[1][model_num])
    test_model_features = eval(models[2][model_num])
    test_model_ev = eval(models[3][model_num])
    score_list.append(evaluate_model_mov(test_model, test_model_features, test_model_ev, df_train, label_train, odds_train, df_test, label_test,
                         odds_test, finish_list, verbose = True))



(2686, 43)

(2686,)

(2686, 6)

(242, 43)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: -1094.7619047619046


(2834, 70)

(2834,)

(2834, 6)

(240, 70)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2834, dtype: int64
Real Score: -11307.5


(2686, 181)

(2686,)

(2686, 6)

(240, 181)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: 245.0


(2686, 33)

(2686,)

(2686, 6)

(242, 33)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: 290.04522738630703


(2762, 38)

(2762,)

(2762, 6)

(240, 38)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2762, dtype: int64
Real Score: -1121.614906832298


(2686, 70)

(2686,)

(2686, 6)

(240, 70)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: -4546.249827467219


(2686, 126)

(2686,)

(2686, 6)

(242, 126)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64




Real Score: -3627.5780210128037


(2834, 28)

(2834,)

(2834, 6)

(242, 28)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2834, dtype: int64
Real Score: 670.7183352768061


(2762, 42)

(2762,)

(2762, 6)

(240, 42)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2762, dtype: int64
Real Score: -4543.855738797268


(2762, 163)

(2762,)

(2762, 6)

(240, 163)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2762, dtype: int64
Real Score: -5259.21563943303


(2686, 31)

(2686,)

(2686, 6)

(242, 31)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: -23400.0


(2771, 32)

(2771,)

(2771, 6)

(242, 32)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2771, dtype: int64
Real Score: -1276.0344827586205


(2834, 181)

(2834,)

(2834, 6)

(240, 181)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2834, dtype: int64
Real Score: -6002.777777777777


(2749, 51)

(2749,)

(2749, 6)

(240, 51)

(240,)

(240, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2749, dtype: int64
Real Score: -589.4327511568886


(2686, 48)

(2686,)

(2686, 6)

(242, 48)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2686, dtype: int64
Real Score: -2911.0344827586205


(2762, 37)

(2762,)

(2762, 6)

(242, 37)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2762, dtype: int64




Real Score: -9397.47622941776


(2762, 43)

(2762,)

(2762, 6)

(242, 43)

(242,)

(242, 6)

250     5
251     3
252     3
253     5
254     0
       ..
4217    0
4225    1
4226    3
4227    5
4228    5
Name: label, Length: 2762, dtype: int64
Real Score: -910.0


In [19]:
models[4] = score_list

In [20]:
with open(model_file, 'w' , newline='') as outfile:
    writer = csv.writer(outfile)
    for row in models:
        #print("HI")
        writer.writerow(row)
    
outfile.close()

In [21]:
print(score_list)

[-1094.7619047619046, -11307.5, 245.0, 290.04522738630703, -1121.614906832298, -4546.249827467219, -3627.5780210128037, 670.7183352768061, -4543.855738797268, -5259.21563943303, -23400.0, -1276.0344827586205, -6002.777777777777, -589.4327511568886, -2911.0344827586205, -9397.47622941776, -910.0]


In [22]:
models[4] = score_list

In [23]:
print(score_list)

[-1094.7619047619046, -11307.5, 245.0, 290.04522738630703, -1121.614906832298, -4546.249827467219, -3627.5780210128037, 670.7183352768061, -4543.855738797268, -5259.21563943303, -23400.0, -1276.0344827586205, -6002.777777777777, -589.4327511568886, -2911.0344827586205, -9397.47622941776, -910.0]
