# UFC Parser - New

In [2]:
import pandas as pd
import numpy as np

In [3]:
# clean up the ownership df:
# 1) get rid of the empty cells
# 2) rename the '%Drafted' column to 'ownership' for easier assignment (b/c it starts w/ a %)
# 3) convert the string values in that column to floats and then convert to a percentage value
# return the modified dataframe

def tweak_own(own_df):
    return (own_df
        .dropna()
        .rename(columns={'%Drafted':'ownership'})
        .assign(ownership = lambda df_: df_.ownership
            .str.replace('%', '')
            .astype(float)
            .div(100))    
    )

In [74]:
def parse_results(results_df):
    return(results_df
        .dropna()
        .join(results_df
            .Lineup
            .str.extract(r'F (?P<F1>.*) F (?P<F2>.*) F (?P<F3>.*) F (?P<F4>.*) F (?P<F5>.*) F (?P<F6>.*)'))
        #can maybe turn the following ownership column assignments into a function?
        .assign(O1=lambda df_: own_df.loc[df_.F1, 'ownership'].values,
            O2=lambda df_: own_df.loc[df_.F2, 'ownership'].values,
            O3=lambda df_: own_df.loc[df_.F3, 'ownership'].values,
            O4=lambda df_: own_df.loc[df_.F4, 'ownership'].values,
            O5=lambda df_: own_df.loc[df_.F5, 'ownership'].values,
            O6=lambda df_: own_df.loc[df_.F6, 'ownership'].values,
            S1=lambda df_: sal_df.loc[df_.F1, 'Salary'].values,
            S2=lambda df_: sal_df.loc[df_.F2, 'Salary'].values,
            S3=lambda df_: sal_df.loc[df_.F3, 'Salary'].values,
            S4=lambda df_: sal_df.loc[df_.F4, 'Salary'].values,
            S5=lambda df_: sal_df.loc[df_.F5, 'Salary'].values,
            S6=lambda df_: sal_df.loc[df_.F6, 'Salary'].values)
        .assign(own_sum=lambda df_:df_[['O1','O2','O3','O4','O5','O6']].sum(axis=1),
            own_prod=lambda df_:df_[['O1','O2','O3','O4','O5','O6']].product(axis=1),
            combos=lambda df_:df_.own_prod * total_entries,
            salary=lambda df_: 50000 - df_[['S1','S2','S3','S4','S5','S6']].sum(axis=1),
            dupes=(results_df
                .groupby('Lineup')
                .Points
                .transform('size')
                .astype('Int16')))
        .drop(columns=['O1','O2','O3','O4','O5','O6','S1','S2','S3','S4','S5','S6'])
    )

In [78]:
# prepare results df for regression testing on dupes
# filter df with only necessary columns and remove duplicates
def regr_prep(res_df):
    cols = ['Lineup', 'combos', 'salary', 'dupes']
    return(res_df
        [cols]
        .drop_duplicates(ignore_index=True)
    )

In [4]:
directory = 'C:\\Users\\steve\\OneDrive\\Documents\\DFS\\UFC\\Results\\'

fname = 'FN - Hermansson vs Strickland - Main.csv'

path = directory + fname

print(path)

C:\Users\steve\OneDrive\Documents\DFS\UFC\Results\FN - Hermansson vs Strickland - Main.csv


In [5]:
results_df = pd.read_csv(path, usecols=[0,2,4,5])
results_df.head()

Unnamed: 0,Rank,EntryName,Points,Lineup
0,1,jsteed22 (6/150),688.79004,F Chidi Njokuani F Alexis Davis F Jailton Alme...
1,2,CrimaSaad (5/21),682.71,F Chidi Njokuani F Alexis Davis F Malcolm Gord...
2,3,bendragos (70/150),679.32,F Sean Strickland F Chidi Njokuani F Jailton A...
3,3,galsharir (1/3),679.32,F Sean Strickland F Chidi Njokuani F Jailton A...
4,3,russelld07 (75/150),679.32,F Sean Strickland F Chidi Njokuani F Jailton A...


In [28]:
own_df_raw = pd.read_csv(path, usecols=[7,9,10], index_col='Player')
own_df = tweak_own(own_df_raw)

In [None]:
#get salaries from file
base_dir = 'C:\\Users\\steve\\OneDrive\\Documents\\DFS\\UFC\\'
sal_file = 'fn salaries.csv'
sal_path = base_dir + sal_file

sal_df = pd.read_csv(sal_path, usecols=[0,1], index_col='Player')
sal_df

In [7]:
# get summary data on results_df, *maybe turn into function later

total_entries = len(results_df)
total_empty = results_df['Lineup'].isnull().sum()
total_live = total_entries - total_empty
total_uniques = results_df['Lineup'].nunique()

print('Total Entries: ', total_entries)
print('Total Empty Lineups: ', total_empty)
print('Total Live Lineups: ', total_live)
print('Total Unique Lineups:', total_uniques)

Total Entries:  31372
Total Empty Lineups:  1007
Total Live Lineups:  30365
Total Unique Lineups: 10693


In [72]:
res_df = parse_results(results_df)

### Testing regular expression techniques

In [None]:
(results_df
    .Lineup
    .dropna()
    #.str.replace(' F ', ',')
    #.str.replace('F ', '')
    #.str.split(',', expand=True).add_prefix('F')
    .str.extract(r'F (?P<F1>.*) F (?P<F2>.*) F (?P<F3>.*) F (?P<F4>.*) F (?P<F5>.*) F (?P<F6>.*)')
    #.str.extractall(r'F (?P<fighter>\S*\s\S*)')
    #.unstack()
)

In [54]:
import re

test_string = 'F Sean Strickland F Denys Bondar F Punahele Soriano F Carlston Harris F Bryan Battle F Phil Rowe'

re.findall(r'F (\S*\s\S*)', test_string)

['Sean Strickland',
 'Denys Bondar',
 'Punahele Soriano',
 'Carlston Harris',
 'Bryan Battle',
 'Phil Rowe']