In [1]:
import pandas as pd
import numpy as np
import datetime as dt




## Importing data from Austin Animal Center

In [24]:
outcomes = pd.read_csv(r'/Users/philliprichardson/Metis/Module 4/Full Process.csv')


## Feature Cleaning and Creation

In [25]:
def city_agg(inp):
    if inp == 'Austin':
        return 'Austin'
    elif inp == 'Travis':
        return 'Travis'
    else:
        return 'Other'
    
def age_group(inp):
    grp = inp.split()
    if grp[1] == 'weeks' and int(grp[0])  <= 6:
        return 'Neonatal'
    elif (grp[1] == 'weeks' and int(grp[0])  > 6) or grp[1] == 'months' or inp == "0 years":
        return 'Young'
    elif grp[1] == 'years' and int(grp[0])  < 10:
        return 'Adult'
    elif int(grp[0]) >= 0:
        return 'Senior'
    else:
        return ''

def outcome_grp(inp):
    if inp in ('Adoption', 'Rto-Adopt'):
        return 'Adoption'
    elif inp in ('Died', 'Euthanasia'):
        return 'Euthanised'
    else:
        return 'Transfered/RTO'

def multi_vist(inp):
    if inp == 0:
        return "First Visit"
    elif inp < 3:
        return "Repeat"
    else:
        return "Frequent"
    
def condition(inp):
    if inp in ('Behavior', 'Feral'):
        return "Behavior Issues"
    elif inp in ('Injured', 'Medical', 'Nursing', 'Pregnant', 'Sick'):
        return "Medical Issues"
    else:
        return 'Normal'


In [28]:
outcomes['city'] = outcomes.FoundLocation.str.split().str[-2]

outcomes['city'] = outcomes.city.apply(lambda x: city_agg(x))

outcomes['Male'] = (outcomes.SexuponIntake.str.split().str[1] == 'Male').astype(int)

outcomes['Dog'] = (outcomes.AnimalType == 'Dog').astype(int)

outcomes['birthcontrol'] = (outcomes.SexuponIntake.str.split().str[0] != 'Intact').astype(int)

outcomes['age_group'] = outcomes.AgeuponIntake.apply(lambda x: age_group(x))

outcomes['normal_hours'] = ((pd.to_datetime(outcomes.DateTime).dt.time > dt.time(7,0,0)) & (pd.to_datetime(outcomes.DateTime).dt.time < dt.time(19,0,0))).astype(int)

outcomes['multicolor'] = (outcomes.Color.str.count('/') > 0).astype(int)

outcomes['mixed breed']= (outcomes.Breed.str.count('Mix')>0).astype(int)

outcomes['outcome'] = outcomes.OutcomeType.apply(lambda x: outcome_grp(x))

outcomes['multi_visit'] = outcomes.prior_occurs.apply(lambda x: multi_vist(x))

outcomes['condition'] = outcomes.IntakeCondition.apply(lambda x: condition(x))

## Feature Selection, Data Filtering, One-Hot Encoding

In [29]:
keep = ['AnimalID', 'IntakeType', 'condition', 'AnimalType', 'Dog', 'outcome', 'staylength', 
        'multi_visit', 'city', 'Male', 'birthcontrol', 'age_group', 'normal_hours', 'multicolor', 'mixed breed']

outcomes_feats = outcomes[keep]

In [30]:
mask = (outcomes_feats['AnimalType'] == 'Dog') | (outcomes_feats['AnimalType'] == 'Cat')

outcomes_filtered = outcomes_feats[mask]

In [31]:
features = ['IntakeType', 'condition', 'Dog',  'multi_visit', 'city', 'Male', 
            'birthcontrol', 'age_group', 'normal_hours', 'multicolor', 'mixed breed']

feats_dummy = pd.get_dummies(outcomes_filtered[features], columns = ['IntakeType', 'condition','multi_visit', 'city', 'age_group'])
outcome = outcomes_filtered.outcome

## Exporting to Pickles for using in other programs

In [139]:
feats_dummy.to_pickle(r'/Users/philliprichardson/Metis/Module 4/feats.pkl')

outcome.to_pickle(r'/Users/philliprichardson/Metis/Module 4/outcome.pkl')