In [46]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# read in intake and outcome data
intakes = pd.read_csv("../data/austin_animal_center_intakes_20241017.csv")
outcomes = pd.read_csv("../data/austin_animal_center_outcomes_20241017.csv")

In [3]:
intakes.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby


In [4]:
outcomes.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A882831,*Hamilton,07/01/2023 06:12:00 PM,Jul 2023,03/25/2023,Adoption,,Cat,Neutered Male,3 months,Domestic Shorthair Mix,Black/White
1,A794011,Chunk,05/08/2019 06:20:00 PM,May 2019,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
2,A776359,Gizmo,07/18/2018 04:02:00 PM,Jul 2018,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
3,A821648,,08/16/2020 11:38:00 AM,Aug 2020,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
4,A720371,Moose,02/13/2016 05:59:00 PM,Feb 2016,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff


*Before combining the two data frames I'm going to clean up column names.*

In [16]:
# function to change column names to snake case
def update_column_name(columns):
    """Convert dataframe column names to snake case

    Keyword arguments:
    columns -- original columns names of dataframe
    
    Returns a dictionary to pass into pandas rename mapper
    """
    return {column: column.lower().replace(" ", "_") for column in columns}

In [19]:
# change column names for both dataframes
intakes.rename(columns=update_column_name(intakes.columns), inplace=True)
outcomes.rename(columns=update_column_name(outcomes.columns), inplace=True)

*Manually updating a couple column names that didn't have spaces, and want to add intake and outcomes identifiers to differentiate columns with same name. Keeping animal_id, name, breed, and color the same because they should be the same across both dataframes.*

In [24]:
intakes.rename(columns={"datetime": "date_time_in",
                        "monthyear": "month_year_in"}, inplace=True)
outcomes.rename(columns={"datetime": "date_time_out",
                         "monthyear": "month_year_out"}, inplace=True)

In [None]:
# converting date_time_in and date_time_out to datetime types
intakes["date_time_in"] = pd.to_datetime(intakes["date_time_in"])
outcomes["date_time_out"] = pd.to_datetime(outcomes["date_time_out"])

# sorting dataframes by these datetime columns
intakes = intakes.sort_values(by="date_time_in", ignore_index=True)
outcomes = outcomes.sort_values(by="date_time_out", ignore_index=True)

In [76]:
# looking at nulls before merging
intakes.isnull().sum(), outcomes.isnull().sum()

(animal_id               0
 name                48393
 date_time_in            0
 month_year_in           0
 found_location          0
 intake_type             0
 intake_condition        0
 animal_type             0
 sex_upon_intake         2
 age_upon_intake         1
 breed                   0
 color                   0
 dtype: int64,
 animal_id               0
 name                48209
 date_time_out           0
 month_year_out          0
 date_of_birth           0
 outcome_type           46
 outcome_subtype     90798
 animal_type             0
 sex_upon_outcome        2
 age_upon_outcome       16
 breed                   0
 color                   0
 dtype: int64)

*A lot of missing names and outcome_subtype, neither of these are surprising since strays brought in won't have names listed and a secondary outcome type isn't necessary. Will drop these columns after the merge.*

In [107]:
# merging two dataframes
# only on animal_id to try an maintain date in and date out order, will drop duplicate columns later
animals = pd.merge(intakes, outcomes, how="left", on="animal_id")

In [109]:
animals.columns

Index(['animal_id', 'name_x', 'date_time_in', 'month_year_in',
       'found_location', 'intake_type', 'intake_condition', 'animal_type_x',
       'sex_upon_intake', 'age_upon_intake', 'breed_x', 'color_x', 'name_y',
       'date_time_out', 'month_year_out', 'date_of_birth', 'outcome_type',
       'outcome_subtype', 'animal_type_y', 'sex_upon_outcome',
       'age_upon_outcome', 'breed_y', 'color_y'],
      dtype='object')

In [112]:
# dropping duplicate columns
animals.drop(columns=["name_x", "animal_type_x", "breed_x", "color_x"], inplace=True)

# renaming the second half of duplicate columns
animals.rename(columns={"name_y": "name",
                        "animal_type_y": "animal_type",
                        "breed_y": "breed",
                        "color_y": "color"}, inplace=True)

In [113]:
# checking nulls again
animals.isnull().sum()

animal_id                0
date_time_in             0
month_year_in            0
found_location           0
intake_type              0
intake_condition         0
sex_upon_intake          2
age_upon_intake          1
name                 49993
date_time_out          909
month_year_out         909
date_of_birth          909
outcome_type           973
outcome_subtype     130459
animal_type            909
sex_upon_outcome       911
age_upon_outcome       925
breed                  909
color                  909
dtype: int64

In [114]:
# dropping name and outcome_subtype columns
animals.drop(columns=["name", "outcome_subtype"], inplace=True)

In [115]:
# now dropping observations with null values, there are enough observations to handle this
print(animals.shape)
animals.dropna(inplace=True)
animals.shape

(215083, 17)


(214094, 17)

In [116]:
# new column for how long an animal is in the shelter
animals["time_in_shelter"] = animals["date_time_out"] - animals["date_time_in"]

# change dtype for new column from timedelta to integer for number of days
# found following method on stack overflow
# https://stackoverflow.com/questions/25646200/python-convert-timedelta-to-int-in-a-dataframe
animals["time_in_shelter"] = animals["time_in_shelter"].dt.days.astype(int)

*There appeared to be some animals that were in the shelter more than once and the intake and outcomes times weren't all attached to the correct instances, checking how many observations of their time in the shelter are negative.*

In [117]:
# filter for negative time_in_shelter
animals[animals["time_in_shelter"] < 0]

Unnamed: 0,animal_id,date_time_in,month_year_in,found_location,intake_type,intake_condition,sex_upon_intake,age_upon_intake,date_time_out,month_year_out,date_of_birth,outcome_type,animal_type,sex_upon_outcome,age_upon_outcome,breed,color,time_in_shelter
219,A663004,2013-10-04 14:11:00,October 2013,2201 Willow Creek Dr in Austin (TX),Owner Surrender,Normal,Spayed Female,8 months,2013-10-02 18:58:00,Oct 2013,01/14/2013,Adoption,Dog,Spayed Female,8 months,Bulldog/Australian Cattle Dog,Black/White,-2
326,A663572,2013-10-06 11:00:00,October 2013,Outside Jurisdiction,Owner Surrender,Normal,Spayed Female,3 years,2013-10-01 11:42:00,Oct 2013,09/21/2010,Adoption,Dog,Spayed Female,3 years,Anatol Shepherd Mix,White/Brown,-5
484,A663722,2013-10-09 12:11:00,October 2013,Austin (TX),Owner Surrender,Normal,Neutered Male,2 months,2013-10-06 18:24:00,Oct 2013,07/16/2013,Adoption,Cat,Neutered Male,2 months,Domestic Shorthair Mix,Black,-3
486,A663723,2013-10-09 12:11:00,October 2013,Austin (TX),Owner Surrender,Normal,Neutered Male,2 months,2013-10-06 18:24:00,Oct 2013,07/16/2013,Adoption,Cat,Neutered Male,2 months,Domestic Shorthair Mix,Brown Tabby,-3
579,A664097,2013-10-11 08:51:00,October 2013,South Austin in Austin (TX),Stray,Normal,Intact Female,9 months,2013-10-10 17:42:00,Oct 2013,12/29/2012,Transfer,Dog,Intact Female,9 months,Pointer Mix,Black/White,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215048,A882845,2024-10-17 11:20:00,October 2024,Austin (TX),Owner Surrender,Injured,Neutered Male,1 year,2024-08-01 09:05:00,Aug 2024,03/09/2023,Adoption,Dog,Neutered Male,1 year,Great Pyrenees/Labrador Retriever,White/Brown,-78
215049,A882845,2024-10-17 11:20:00,October 2024,Austin (TX),Owner Surrender,Injured,Neutered Male,1 year,2024-10-14 15:08:00,Oct 2024,03/09/2023,Adoption,Dog,Neutered Male,1 year,Great Pyrenees/Labrador Retriever,White/Brown,-3
215050,A906423,2024-10-17 11:38:00,October 2024,Travis (TX),Owner Surrender,Normal,Neutered Male,2 years,2024-10-13 13:16:00,Oct 2024,06/02/2022,Adoption,Dog,Neutered Male,2 years,Pit Bull,Black,-4
215058,A913366,2024-10-17 12:22:00,October 2024,Austin (TX),Owner Surrender,Normal,Spayed Female,3 months,2024-09-27 16:25:00,Sep 2024,07/15/2024,Adoption,Dog,Spayed Female,2 months,Pit Bull,Fawn/White,-20


(214094, 18)

0.11341747083056974