## Outcomes

#### Imports

In [1]:
import pandas as pd
import janitor
import numpy as np

#### Read in Data

In [2]:
outcomes = pd.read_csv('../data/outcomes.csv', parse_dates=['DateTime'])

In [3]:
outcomes.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,2019-05-08 18:20:00,05/08/2019 06:20:00 PM,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,2018-07-18 16:02:00,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,2020-08-16 11:38:00,08/16/2020 11:38:00 AM,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2016-02-13 17:59:00,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,2014-03-18 11:47:00,03/18/2014 11:47:00 AM,03/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [4]:
outcomes = outcomes.clean_names()

In [5]:
outcomes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126353 entries, 0 to 126352
Data columns (total 12 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   animal_id         126353 non-null  object        
 1   name              86767 non-null   object        
 2   datetime          126353 non-null  datetime64[ns]
 3   monthyear         126353 non-null  object        
 4   date_of_birth     126353 non-null  object        
 5   outcome_type      126332 non-null  object        
 6   outcome_subtype   58280 non-null   object        
 7   animal_type       126353 non-null  object        
 8   sex_upon_outcome  126352 non-null  object        
 9   age_upon_outcome  126352 non-null  object        
 10  breed             126353 non-null  object        
 11  color             126353 non-null  object        
dtypes: datetime64[ns](1), object(11)
memory usage: 11.6+ MB


#### Name

In [6]:
outcomes['name'].value_counts()

Max             566
Bella           533
Luna            496
Rocky           391
Daisy           371
               ... 
Benltey           1
121 G             1
*Tembo            1
Ace Regalado      1
Kaypo             1
Name: name, Length: 20060, dtype: int64

In [7]:
outcomes['name'].str.startswith("*").value_counts(normalize=True)

False    0.668054
True     0.331946
Name: name, dtype: float64

In [8]:
outcomes['name'] = outcomes['name'].str.replace('*', '') 

#### Month Year

In [9]:
outcomes = outcomes.drop(columns='monthyear')

#### Date of Birth

In [10]:
outcomes['date_of_birth'] = pd.to_datetime(outcomes['date_of_birth'], errors='coerce')

#### Outcome Type

In [11]:
outcomes['outcome_type'].value_counts(normalize=True)

Adoption           0.444978
Transfer           0.294771
Return to Owner    0.172110
Euthanasia         0.067798
Died               0.009309
Rto-Adopt          0.005707
Disposal           0.004591
Missing            0.000554
Relocate           0.000182
Name: outcome_type, dtype: float64

#### Outcomes Subtype

In [12]:
outcomes['outcome_subtype'].value_counts()

Partner                31308
Foster                 11093
Rabies Risk             3796
Suffering               3283
SCRP                    3211
Snr                     2707
In Kennel                607
Aggressive               545
Offsite                  431
Medical                  306
In Foster                303
At Vet                   266
Behavior                 158
Enroute                   84
Field                     41
Underage                  36
Court/Investigation       31
In Surgery                24
Possible Theft            16
Barn                      11
Prc                       10
Customer S                 8
Emer                       5
Name: outcome_subtype, dtype: int64

#### Animal Type

In [13]:
outcomes['animal_type'].value_counts()

Dog          71569
Cat          47379
Other         6790
Bird           595
Livestock       20
Name: animal_type, dtype: int64

#### Sex Upon Outcome

In [14]:
#pattern is neuter/spay and gender, so I double the unknowns to make the split easier
outcomes['sex_upon_outcome'] = outcomes['sex_upon_outcome'].str.replace('Unknown', 'Unknown Unknown')

In [15]:
outcomes['sex_split'] = outcomes['sex_upon_outcome'].str.split(' ')

In [16]:
outcomes['spay_neuter'] = outcomes['sex_split'].str[0]
outcomes['sex'] = outcomes['sex_split'].str[1]

#### Age Upon Outcome

In [17]:
outcomes['age_digit'] = outcomes['age_upon_outcome'].str.split(' ').str[0]
outcomes['age_unit'] = outcomes['age_upon_outcome'].str.split(' ').str[1]

In [18]:
outcomes['age_digit'] = outcomes['age_digit'].astype('float')

In [19]:
age_unit_dict = {
    'year':'years',
    'month':'months',
    'week': 'weeks',
    'day':'days'
}

outcomes['age_unit'] = outcomes['age_unit'].map(age_unit_dict).fillna(outcomes['age_unit'])

In [20]:
def age_in_years(unit):
    if unit == 'years':
        return 1
    elif unit == 'months':
        return 12
    elif unit == 'weeks':
        return 52.143
    elif unit == 'days':
        return 365

In [21]:
outcomes['age_conversion'] = [age_in_years(x) for x in outcomes['age_unit']]

In [22]:
outcomes['age_in_years'] = round((outcomes['age_digit'] / outcomes['age_conversion']), 2)

#### Breed

In [23]:
outcomes['breed'].value_counts()

Domestic Shorthair Mix            31417
Pit Bull Mix                       8527
Domestic Shorthair                 7355
Labrador Retriever Mix             6960
Chihuahua Shorthair Mix            6288
                                  ...  
Chesa Bay Retr/German Shepherd        1
Orpington                             1
Beagle/American Foxhound              1
Waxwing Mix                           1
Shih Tzu/French Bulldog               1
Name: breed, Length: 2652, dtype: int64

#### Color

In [24]:
outcomes['color'].value_counts()

Black/White                 13260
Black                       10565
Brown Tabby                  7161
Brown                        5529
White                        4456
                            ...  
Lynx Point/Cream                1
Tricolor/Orange                 1
Tan/Apricot                     1
Agouti/Gray                     1
Brown Tabby/Orange Tabby        1
Name: color, Length: 600, dtype: int64

#### Drop Columns

In [25]:
cols_to_drop = [
    'date_of_birth',
    'sex_upon_outcome',
    'sex_split',
    'age_upon_outcome',
    'age_digit',
    'age_unit',
    'age_conversion'
]

outcomes = outcomes.drop(columns=cols_to_drop)