## Outcomes

#### Imports

In [1]:
import pandas as pd
import janitor
import numpy as np

#### Read in Data

In [2]:
outcomes = pd.read_csv('../data/outcomes.csv', parse_dates=['DateTime'])

In [3]:
outcomes.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,2019-05-08 18:20:00,05/08/2019 06:20:00 PM,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,2018-07-18 16:02:00,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,2020-08-16 11:38:00,08/16/2020 11:38:00 AM,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2016-02-13 17:59:00,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,2014-03-18 11:47:00,03/18/2014 11:47:00 AM,03/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [4]:
outcomes = outcomes.clean_names()

In [5]:
#dropping information available in the intake data

columns_to_drop = [
    'name',
    'monthyear',
    'date_of_birth',
    'animal_type',
    'breed',
    'color'
]

outcomes = outcomes.drop(columns=columns_to_drop)

In [6]:
outcomes.head()

Unnamed: 0,animal_id,datetime,outcome_type,outcome_subtype,sex_upon_outcome,age_upon_outcome
0,A794011,2019-05-08 18:20:00,Rto-Adopt,,Neutered Male,2 years
1,A776359,2018-07-18 16:02:00,Adoption,,Neutered Male,1 year
2,A821648,2020-08-16 11:38:00,Euthanasia,,Unknown,1 year
3,A720371,2016-02-13 17:59:00,Adoption,,Neutered Male,4 months
4,A674754,2014-03-18 11:47:00,Transfer,Partner,Intact Male,6 days


#### Outcome Type

In [7]:
outcomes['outcome_type'].value_counts(normalize=True)

Adoption           0.444978
Transfer           0.294771
Return to Owner    0.172110
Euthanasia         0.067798
Died               0.009309
Rto-Adopt          0.005707
Disposal           0.004591
Missing            0.000554
Relocate           0.000182
Name: outcome_type, dtype: float64

#### Outcomes Subtype

In [8]:
outcomes['outcome_subtype'].value_counts()

Partner                31308
Foster                 11093
Rabies Risk             3796
Suffering               3283
SCRP                    3211
Snr                     2707
In Kennel                607
Aggressive               545
Offsite                  431
Medical                  306
In Foster                303
At Vet                   266
Behavior                 158
Enroute                   84
Field                     41
Underage                  36
Court/Investigation       31
In Surgery                24
Possible Theft            16
Barn                      11
Prc                       10
Customer S                 8
Emer                       5
Name: outcome_subtype, dtype: int64

#### Sex Upon Outcome

In [9]:
#pattern is neuter/spay and gender, so I double the unknowns to make the split easier
outcomes['sex_upon_outcome'] = outcomes['sex_upon_outcome'].str.replace('Unknown', 'Unknown Unknown')

In [10]:
outcomes['sex_split'] = outcomes['sex_upon_outcome'].str.split(' ')

In [11]:
outcomes['spay_neuter'] = outcomes['sex_split'].str[0]
outcomes['sex'] = outcomes['sex_split'].str[1]

#### Age Upon Outcome

In [12]:
outcomes['age_digit'] = outcomes['age_upon_outcome'].str.split(' ').str[0]
outcomes['age_unit'] = outcomes['age_upon_outcome'].str.split(' ').str[1]

In [13]:
outcomes['age_digit'] = outcomes['age_digit'].astype('float')

In [14]:
age_unit_dict = {
    'year':'years',
    'month':'months',
    'week': 'weeks',
    'day':'days'
}

outcomes['age_unit'] = outcomes['age_unit'].map(age_unit_dict).fillna(outcomes['age_unit'])

In [15]:
def age_in_years(unit):
    if unit == 'years':
        return 1
    elif unit == 'months':
        return 12
    elif unit == 'weeks':
        return 52.143
    elif unit == 'days':
        return 365

In [16]:
outcomes['age_conversion'] = [age_in_years(x) for x in outcomes['age_unit']]

In [17]:
outcomes['age_in_years'] = round((outcomes['age_digit'] / outcomes['age_conversion']), 2)

## Feature Creation

#### Outcomes Count

In [18]:
#outcomes['outcome_count'] = outcomes.groupby('animal_id')['datetime'].rank(method='dense')

In [19]:
outcomes['category'] = 'outcome'

#### Drop Columns

In [20]:
cols_to_drop = [
    'sex_upon_outcome',
    'sex_split',
    'age_upon_outcome',
    'age_digit',
    'age_unit',
    'age_conversion'
]

outcomes = outcomes.drop(columns=cols_to_drop)

## Data to CSV

In [21]:
outcomes.to_csv('../data/outcomes_cleaned.csv', index=False)