# Shelter Animal Outcomes

## Load Training Data

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
train = pd.read_csv('data/train.csv')

In [3]:
train.head()

Unnamed: 0,AnimalID,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
0,A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,,Dog,Neutered Male,1 year,Shetland Sheepdog Mix,Brown/White
1,A656520,Emily,2013-10-13 12:44:00,Euthanasia,Suffering,Cat,Spayed Female,1 year,Domestic Shorthair Mix,Cream Tabby
2,A686464,Pearce,2015-01-31 12:28:00,Adoption,Foster,Dog,Neutered Male,2 years,Pit Bull Mix,Blue/White
3,A683430,,2014-07-11 19:09:00,Transfer,Partner,Cat,Intact Male,3 weeks,Domestic Shorthair Mix,Blue Cream
4,A667013,,2013-11-15 12:52:00,Transfer,Partner,Dog,Neutered Male,2 years,Lhasa Apso/Miniature Poodle,Tan


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26729 entries, 0 to 26728
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   AnimalID        26729 non-null  object
 1   Name            19038 non-null  object
 2   DateTime        26729 non-null  object
 3   OutcomeType     26729 non-null  object
 4   OutcomeSubtype  13117 non-null  object
 5   AnimalType      26729 non-null  object
 6   SexuponOutcome  26728 non-null  object
 7   AgeuponOutcome  26711 non-null  object
 8   Breed           26729 non-null  object
 9   Color           26729 non-null  object
dtypes: object(10)
memory usage: 2.0+ MB


In [5]:
train.isnull().sum()

AnimalID              0
Name               7691
DateTime              0
OutcomeType           0
OutcomeSubtype    13612
AnimalType            0
SexuponOutcome        1
AgeuponOutcome       18
Breed                 0
Color                 0
dtype: int64

## Clean Training Data

In [9]:
def get_sex(string):
    string = str(string)
    if string.find('Male') >= 0: return 'male'
    if string.find('Female') >= 0: return 'female'
    return 'unknown'

def get_neutered(string):
    string = str(string)
    if string.find('Spayed') >= 0: return 'neutered'
    if string.find('Neutered') >= 0: return 'neutered'
    if string.find('Intact') >= 0: return 'intact'
    return 'unknown'

In [10]:
def calculate_age_years(age_string):

    age_string = str(age_string)
    if age_string == 'nan':
        return 0

    age = int(age_string.split()[0])

    if age_string.find('year') > -1:
        return age
    elif age_string.find('month') > -1:
        return age / 12
    elif age_string.find('week') > -1:
        return age / 52
    elif age_string.find('day') > -1:
        return age / 365

In [11]:
def age_category(age):
    if age < 3:
        return 'young'
    elif age >= 3 and age < 5:
        return 'middle'
    elif age >= 5 and age < 10:
        return 'adult'
    elif age >= 10:
        return 'old'

In [12]:
def clean_data(dataframe, drop_columns):
    dataframe['sex'] = dataframe['SexuponOutcome'].apply(get_sex)
    dataframe['neutered'] = dataframe['SexuponOutcome'].apply(get_neutered)

    dataframe['age_years'] = dataframe['AgeuponOutcome'].apply(calculate_age_years)

    dataframe['age_category'] = dataframe['age_years'].apply(age_category)

    dataframe['DateTime'] = pd.to_datetime(dataframe['DateTime'])

    dataframe.drop(drop_columns, axis=1, inplace=True)

In [13]:
drop_columns = ['SexuponOutcome', 'AgeuponOutcome', 'OutcomeSubtype']

clean_data(train, drop_columns)

In [14]:
train.isnull().sum()

AnimalID           0
Name            7691
DateTime           0
OutcomeType        0
AnimalType         0
Breed              0
Color              0
sex                0
neutered           0
age_years          0
age_category       0
dtype: int64

In [15]:
train.head()

Unnamed: 0,AnimalID,Name,DateTime,OutcomeType,AnimalType,Breed,Color,sex,neutered,age_years,age_category
0,A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,Dog,Shetland Sheepdog Mix,Brown/White,male,neutered,1.0,young
1,A656520,Emily,2013-10-13 12:44:00,Euthanasia,Cat,Domestic Shorthair Mix,Cream Tabby,female,neutered,1.0,young
2,A686464,Pearce,2015-01-31 12:28:00,Adoption,Dog,Pit Bull Mix,Blue/White,male,neutered,2.0,young
3,A683430,,2014-07-11 19:09:00,Transfer,Cat,Domestic Shorthair Mix,Blue Cream,male,intact,0.057692,young
4,A667013,,2013-11-15 12:52:00,Transfer,Dog,Lhasa Apso/Miniature Poodle,Tan,male,neutered,2.0,young


In [31]:
train['DateTime'].max()

Timestamp('2016-02-21 19:17:00')

## Visualizations

In [21]:
fig = px.histogram(train, x="AnimalType")
fig.show()

In [23]:
fig = px.histogram(train, x="OutcomeType", color='AnimalType')
fig.show()

In [47]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="DateTime", color='OutcomeType')
fig.show()

In [29]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="OutcomeType", color='Breed')
fig.show()

In [64]:
top_breeds = train[train['AnimalType'] == 'Cat']['Breed'].value_counts()[:5].index.to_list()

fig = px.histogram(train[train['Breed'].isin(top_breeds)], x="Breed")
fig.show()

In [32]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="OutcomeType", color='Color')
fig.show()

In [68]:
top_colors = train[train['AnimalType'] == 'Cat']['Color'].value_counts()[:5].index.to_list()

fig = px.histogram(train[train['Color'].isin(top_colors)], x="Color")
fig.show()

In [38]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="OutcomeType", color='neutered')
fig.show()

In [71]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="neutered")
fig.show()

In [45]:
fig = px.histogram(train[train['AnimalType'] == 'Cat'], x="OutcomeType", color='age_category')
fig.show()

In [73]:
fig = px.histogram(train[train['AnimalType'] == 'Dog'], x='age_category')
fig.show()