# Feature Engineering/Processing

In [4]:
import pickle
import pandas as pd
from os import getcwd
from os.path import dirname

path = getcwd()
parent = dirname(path)
DIR_DATA = parent+'/data/'
DIR_CLEAN_DATA = DIR_DATA+'clean/'
DIR_PROCESS_DATA = DIR_DATA+'processed/'

#### Possible new features:
- Number past of injuries
- Category of severity of last injury
- Ratio of attendance
- Defensive category based on the last seasons?
- Attacking category based on the last seasons?

## Inactive List

In [None]:
injury_dictionary = {
        'foot': 'foot',
        'toe': 'toe',
        'heel': 'heel',
        'ankle': 'ankle',
        'achilles': 'achilles',
        'calf': 'calf',
        'shin': 'shin',
        'tibia': 'shin',
        'fibula': 'shin',
        'acl': 'knee',
        'mcl': 'knee',
        'knee': 'knee',
        'hamstring': 'hamstring',
        'quad': 'quad',
        'groin':'groin',
        'hip': 'hip',
        'femur':'quad',
        'shoulder':'shoulder',
        'back': 'torso',
        'torso':'torso',
        'ribs':'torso',
        'abdominal':'abdominal',
        'neck': 'head',
        'eye': 'face',
        'nose': 'face',
        'head': 'head',
        'finger':'finger',
        'hand':'hand',
        'arm': 'arm',
        'bicep':'arm',
        'tricep':'arm',
        'elbow': 'elbow',
        'wrist': 'wrist'
}

inactive_list_df = pickle.load(open(DIR_CLEAN_DATA+'inactive_list_cleaned.p', "rb" ))

In [None]:
"""
    This function filters the "notes" field associated with each missed game
    or inactive list event.
    
    Input:
        -Text string 'c'
    Outputs: 
        -  "note keyword" (e.g. calf, shin),
        -  a note "category" (eg. lower leg sick, healthy inactive)
@author: evanl
"""


def notes_filter(c):
    
    """
    This function filters the "notes" field associated with each missed game
    or inactive list event. It returns: (a)  "note keyword" (e.g. calf, shin),
    and (b) a note "category" (eg. lower leg sick, healthy inactive)
    """
    
    #convert string to lower case characters
    note = c['Notes']
    lower_case_note =note.lower()
    
    #-------------Player activated or returned to lineup----------------------
    if any(x in lower_case_note for x in ['return', 'returned','activate', 'activated']):
        return 'returned to lineup', 'n/a'
               
    #----------Healthy Inactive/Missed Game--------------------------
    #If no reason is given for missed game or move to inactive list, assume it
    #was a non-injury move.
    elif lower_case_note == 'placed on il' or lower_case_note == 'placed on il (p)':
        return 'roster move', 'healthy inactive'
    
    elif 'suspension' in lower_case_note:
        return 'suspension', 'healthy inactive'
    
    elif any(x in lower_case_note for x in ['family','personal','birth', 'death']):
        return 'personal reasons', 'healthy inactive'
    
        
    #-------Rest Inactive--------------------------------
    elif 'rest' in lower_case_note:
        return 'rest', 'rest'
           
     #-------Sick Inactive  -------------------------------
    elif any(x in lower_case_note for x in ['virus','headache','flu', 'sick', 'illness','infection','pneumonia', 'gastro','appende','nausea', 'pox', 'dizziness', 'poisoning','bronchitis']):
        return 'sick', 'sick'
    
    #--------Foot Injuries------------------------------------
    elif 'foot' in lower_case_note:
        return 'foot', 'foot'
    elif 'toe' in lower_case_note:
        return 'toe', 'foot'
    elif 'heel' in lower_case_note:
        return 'heel', 'foot'
    
    #-------Lower leg injuries---------------------------
    elif 'ankle' in lower_case_note:
        return 'ankle', 'lower leg'             
    elif 'achilles'in lower_case_note:
        return 'achilles', 'lower leg'  
    elif 'calf' in lower_case_note:
        return 'calf', 'lower leg'
    elif 'shin' in lower_case_note:
        return 'shin', 'lower leg'
    elif 'tibia' in lower_case_note:
        return 'tibia', 'lower leg'
    elif 'fibula' in lower_case_note:
        return 'fibula','lower leg'
   
    #---------Knee injuries------------------------------------
    elif 'acl' in lower_case_note:
        return 'ACL', 'knee'
    elif 'mcl' in lower_case_note:
        return 'MCL', 'knee'
    elif any(x in lower_case_note for x in ['knee','patella','meniscus']):
        return 'knee', 'knee'
    
    #---------Upper leg injuries---------------------------------
    elif any(x in lower_case_note for x in ['quad','quadriceps','thigh']):
        return 'quad', 'upper leg'
    elif 'hamstring' in lower_case_note:
        return 'hamstring', 'upper leg'
    elif 'groin' in lower_case_note:
        return 'groin', 'upper leg'
    elif any(x in lower_case_note for x in ['hip','adductor']):
        return 'hip', 'upper leg'
    elif 'femur' in lower_case_note:
        return 'femur', 'upper leg'
    
    #-----------leg catch all------------------------------------
    elif 'leg' in lower_case_note:
        return 'leg', 'leg'
    
    
    #-----------Torso injuries--------------------------
    elif any(x in lower_case_note for x in ['chest', 'pectoral']):
        return 'chest', 'torso'
    elif any(x in lower_case_note for x in ['shoulder','rotator cuff']):
        return 'shoulder', 'torso'
    elif 'back' in lower_case_note:
        return 'back', 'torso'
    elif 'collarbone' in lower_case_note:
        return 'collarbone', 'torso'
    elif 'rib' in lower_case_note:
        return 'ribs', 'torso'
    elif any(x in lower_case_note for x in ['abdom','abductor','oblique']):
        return 'abdominal', 'torso'
    
    #------------Head/neck injuries----------------------------
    elif 'neck' in lower_case_note:
        return 'neck', 'head'
    elif any(x in lower_case_note for x in ['head', 'concussion']):
        return 'head', 'head'
    elif 'eye' in lower_case_note:
        return 'eye', 'head'
    elif 'nose' in lower_case_note:
        return 'nose', 'head'
    
    #------------ Hand injuries---------------------------------
    elif 'hand' in lower_case_note:
        return 'hand', 'hand'
    elif any(x in lower_case_note for x in ['finger', 'thumb']):
        return 'finger', 'hand'

    
    #------------ Arm injuries---------------------------------
    elif 'arm' in lower_case_note:
        return 'arm', 'arm'
    elif 'elbow' in lower_case_note:
        return 'elbow', 'arm'
    elif 'bicep' in lower_case_note:
        return 'bicep', 'arm'
    elif 'tricep' in lower_case_note:
        return 'tricep', 'arm'
    elif 'wrist' in lower_case_note:
        return 'wrist', 'arm'     
    else:
        return 'other', 'other'

## Players Stats

In [5]:
players_stats_df = pickle.load(open(DIR_CLEAN_DATA+'player_stats_cleaned.p', "rb" ))
players_stats_df

Unnamed: 0,Year,Player,Season,Team,Age,Inches,Weight,GP,MIN,REB,...,AVG_SPEED_DEF,PLAYER_ID,CONTESTED_SHOTS,CONTESTED_SHOTS_2PT,CONTESTED_SHOTS_3PT,BOX_OUTS,OFF_BOXOUTS,DEF_BOXOUTS,DEFLECTIONS,CHARGES_DRAWN
0,2013,AJ Price,regular,Minnesota Timberwolves,27,74,195,28,3.5,0.4,...,3.96,201985,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
1,2013,Aaron Brooks,regular,Denver Nuggets,29,72,161,72,21.6,1.9,...,3.99,201166,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
2,2013,Aaron Gray,regular,Sacramento Kings,29,84,270,37,9.6,3.0,...,3.91,201189,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
3,2013,Adonis Thomas,regular,Philadelphia 76ers,21,79,240,6,6.3,0.5,...,4.16,203519,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
4,2013,Al Harrington,regular,Washington Wizards,34,81,245,34,15.0,2.4,...,3.80,1733,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6612,2021,Xavier Tillman,post,Memphis Grizzlies,23,80,245,9,15.5,3.3,...,4.16,1630214,6.33,4.56,1.78,1.22,0.44,0.78,1.44,0.0
6613,2021,Yuta Watanabe,post,Toronto Raptors,27,81,215,4,2.6,0.0,...,3.24,1629139,1.00,0.25,0.75,0.00,0.00,0.00,0.00,0.0
6614,2021,Zach LaVine,post,Chicago Bulls,27,77,200,4,38.1,5.3,...,3.94,203897,8.25,4.25,4.00,0.50,0.00,0.50,0.25,0.0
6615,2021,Zeke Nnaji,post,Denver Nuggets,21,81,240,2,4.3,0.0,...,3.78,1630192,0.50,0.50,0.00,0.00,0.00,0.00,0.00,0.0


## Dataframe for Modelling

Row per player
- Ratio of attendance per season
- Rebounds per game
- Inches
- Weight