In [1]:
%run imports.ipynb

imports loaded.


### Func def

#### pre-processing

In [8]:
def get_height(item):
    
    '''return height in cm'''
    
    item = str(item)
    
    if item[0] == '-':
        return None
    else:
        return round(30.48 * float(item[0:item.find("'")]) + 2.54 * float(item[item.find("'")+1:item.find('"')]),2)

print('get_height(item)')    

get_height(item)


In [9]:
def get_weight(item):
    
    '''return weight in kg'''
    
    item = str(item)
    
    if item[0] == '-':
        return None
    else:
        return 0.45 * float(item[0:item.find('lbs.')])
    
print('get_weight(item)')    

get_weight(item)


In [11]:
def get_reach(item):
    
    '''return reach in cm'''
    
    item = str(item)
    
    if item[0] == '-':
        return None
    else:
        return 2.54 * float(item[0:item.find('"')])
    
print('get_reach(item)')    

get_reach(item)


In [10]:
def get_std_reach(item, height, reach_list, height_list):
    '''return "standard reach" ( =mean(reach/height)*height )'''
    return round(height*np.mean(reach_list/height_list),2)

print('get_std_reach(item, height, reach_list, height_list)')

get_std_reach(item, height, reach_list, height_list)


In [12]:
def get_age(item):
    
    '''return age (instead of date of birth)'''
    
    from datetime import date,timedelta
    
    item = str(item)
    current_date = date.today().year
    
    if item[0] == '-':
        return None
    else:
        return (current_date - int(item[item.find(',')+1:]))
    
print('get_age(item)')    

get_age(item)


In [13]:
def get_std_age(item, age_list):
    
    '''return standrd age per weight class'''  
    return round(np.mean(age_list),2)

print('get_std_age(item, age_list)')

get_std_age(item, age_list)


In [14]:
def get_win_perc(name, date, df):
    
    '''return current career winning %'''
    
    wins = len(df[(df['w_fighter'] == name) & (df.date < date)])
    losses = len(df[(df['l_fighter'] == name) & (df.date < date)])
    
    if wins == 0:
        return 0
    return wins/(wins+losses)

print('get_win_perc(name, date, df)')

get_win_perc(name, date, df)


In [17]:
def get_lose_streak_data_frame(name, date, df):
    
    '''return losses data frame of: sequence(from last to first), result(win/lose) & opponent '''
    
    lose_streak = pd.DataFrame({
        'result': np.array('lose'),
        'date': pd.to_datetime(np.array(df.date[df.l_fighter == name])),
        'opponent': np.array(df['w_fighter'][df.l_fighter == name])
    })
    
    return lose_streak[lose_streak.date < date]

print('get_lose_streak_data_frame(name, date, df)')

get_lose_streak_data_frame(name, date, df)


In [18]:
def get_win_streak_data_frame(name, date, df):
    
    '''return wins data frame of: sequence(from last to first), result(win/lose) & opponent '''
    
    win_streak = pd.DataFrame({
        'result': np.array('win'),
        'date': pd.to_datetime(np.array(df.date[df.w_fighter == name])),
        'opponent': np.array(df['l_fighter'][df.w_fighter == name])
    })
    
    return win_streak[win_streak.date < date]

print('get_win_streak_data_frame(name, date, df)')

get_win_streak_data_frame(name, date, df)


In [19]:
def get_streak_data_frame(name, date, df):
    
    '''return losses & winns data frame sorted by date'''
    
    wins_df = get_win_streak_data_frame(name, date, df)
    losses_df = get_lose_streak_data_frame(name, date, df)
    
    return (pd.concat([wins_df, losses_df])).sort_values(by='date', ascending=False)

print('get_streak_data_frame(name, date, df)')

get_streak_data_frame(name, date, df)


In [20]:
def get_win_streak(name, date, df):
    
    '''return int of fighter recent win streak'''
    
    streak = get_streak_data_frame(name, date, df)
    
    count = 0
    for result in streak['result']:
        if result == 'lose':
            return count
        count += 1
    
    return count

print('get_win_streak(name, date, df)')

get_win_streak(name, date, df)


In [21]:
def get_lose_streak(name, date, df):
    
    '''return int of fighter recent losses streak'''
    
    streak = get_streak_data_frame(name, date, df)
    
    count = 0
    for result in streak['result']:
        if result == 'win':
            return count
        count += 1
    
    return count

print('get_lose_streak(name, date, df)')

get_lose_streak(name, date, df)


In [23]:
def get_fighter_stats(name, data ):
    '''return fighters statistics from stat_df'''
    
    stat_list = data.columns[1:]
    row = (data[stat_list][data.name==name]).reset_index(drop=True)
    
    if len(row) < 1:
        return np.array(len(stat_list)*[None])
    else:
        return np.array(row.loc[0])
    
print('get_fighter_stats(name, data )')    

get_fighter_stats(name, data )


In [24]:
def add_columns(data, column_list):
    
    '''add new columns to data '''
    
    for column in column_list:
        data[column] = None
    
    return data

print('add_columns(data, column_list)')

add_columns(data, column_list)


#### feature_engineering

In [4]:
def get_stat(index, data, stat='height', fo=''):
    return data.loc[index, fo + stat]

print("get_stat(index, data, stat='height', fo='')")

get_stat(index, data, stat='height', fo='')


In [6]:
def get_swaped_row(index, data):
    
    '''return row with opponent as winner and statistics swapped'''
    
    swapped_row = np.array([])
    
    for column in data.columns:
        
        if column.find('f_') > -1: # f_ will get opponent stats
            swapped_row = np.append(swapped_row, get_stat(index, data=df, stat=column[2:], fo='o_'))
        elif column.find('o_') > -1: # o_ will get fighter stats
            swapped_row = np.append(swapped_row, get_stat(index, data=df, stat=column[2:], fo='f_'))          
        elif column == 'result':
            swapped_row = np.append(swapped_row, 'lose')
        elif column == 'fighter':
            swapped_row = np.append(swapped_row, get_stat(index, data=df, stat='opponent'))
        elif column == 'opponent':
            swapped_row = np.append(swapped_row, get_stat(index, data=df, stat='fighter'))
        else:
            swapped_row = np.append(swapped_row, get_stat(index, data=df, stat=column))
    
    return swapped_row

print('get_swaped_row(index, data)')

get_swaped_row(index, data)


#### Model training