# Intakes and Outcomes Merge

#### Imports

In [1]:
import pandas as pd

#### Read in Data

In [2]:
intakes = pd.read_csv('../data/intakes_clean.csv', parse_dates=['datetime'])

In [3]:
outcomes = pd.read_csv('../data/outcomes_cleaned.csv', parse_dates=['datetime'])

#### Merge Datasets

In [4]:
intake_merge = intakes[['animal_id', 'datetime', 'category']]

In [5]:
outcome_merge = outcomes[['animal_id', 'datetime', 'category']]

In [6]:
intake_merge.head()

Unnamed: 0,animal_id,datetime,category
0,A786884,2019-01-03 16:19:00,intake
1,A706918,2015-07-05 12:59:00,intake
2,A724273,2016-04-14 18:43:00,intake
3,A665644,2013-10-21 07:59:00,intake
4,A682524,2014-06-29 10:38:00,intake


In [7]:
outcome_merge.head()

Unnamed: 0,animal_id,datetime,category
0,A794011,2019-05-08 18:20:00,outcome
1,A776359,2018-07-18 16:02:00,outcome
2,A821648,2020-08-16 11:38:00,outcome
3,A720371,2016-02-13 17:59:00,outcome
4,A674754,2014-03-18 11:47:00,outcome


In [8]:
intermediate = pd.concat([intake_merge, outcome_merge]).sort_values('datetime')

In [9]:
#this will allow us to get intakes and outcomes matched and on the same row

intermediate['time_shift'] = intermediate.groupby('animal_id').datetime.shift(-1)
intermediate['next_event'] = intermediate.groupby('animal_id').category.shift(-1)

In [10]:
intermediate.head()

Unnamed: 0,animal_id,datetime,category,time_shift,next_event
67176,A521520,2013-10-01 07:51:00,intake,2013-10-01 15:39:00,outcome
35879,A664235,2013-10-01 08:33:00,intake,2013-10-01 10:39:00,outcome
2185,A664237,2013-10-01 08:33:00,intake,2013-10-01 10:44:00,outcome
106680,A664236,2013-10-01 08:33:00,intake,2013-10-01 10:44:00,outcome
108286,A664233,2013-10-01 08:53:00,intake,2013-10-01 15:33:00,outcome


In [11]:
#ensures we only get intake to outcome results
intermediate = intermediate[(intermediate['category'] == 'intake') & (intermediate['next_event'] == 'outcome')]

In [12]:
intakes_merge = intermediate.merge(intakes, on=['animal_id', 'datetime'], suffixes=['_inter', '_intakes'])

In [13]:
intakes_outcomes = intakes_merge.merge(outcomes, 
                        left_on=['animal_id', 'time_shift'], 
                        right_on=['animal_id', 'datetime'],
                        suffixes=['_inter', '_outcomes'])

#### Reordering, Dropping, and Renaming Columns

In [15]:
columns_to_keep = [
    'animal_id',
    'datetime_inter',
    'name',
    'intake_type',
    'intake_condition',
    'animal_type',
    'sex_inter',
    'spay_neuter_inter',
    'age_in_years_inter',
    'age_group',
    'breed',
    'breed_1',
    'breed_2',
    'mix_flag',
    'pit_flag',
    'color',
    'color_1',
    'color_2',
    'found_address',
    'found_city',
    'datetime_outcomes',
    'outcome_type',
    'outcome_subtype',
    'sex_outcomes',
    'spay_neuter_outcomes',
    'age_in_years_outcomes'
]

intakes_outcomes = intakes_outcomes[columns_to_keep]

In [16]:
intakes_outcomes = intakes_outcomes.rename(
                                            columns={
                                                    'datetime_inter':'intake_datetime', 
                                                    'sex_inter':'intake_sex',
                                                    'spay_neuter_inter':'intake_spay_neuter',
                                                    'age_in_years_inter':'intake_age_in_years',
                                                    'sex_outcomes':'outcome_sex',
                                                    'spay_neuter_outcomes':'outcome_spay_neuter',
                                                    'age_in_years_outcomes':'outcome_age_in_years'
                                                    }
                                            )

#### Filter Dataset

In [17]:
dogs = intakes_outcomes[intakes_outcomes['animal_type'] == 'Dog']

#### Save csv

In [19]:
dogs.to_csv('../data/dogs.csv', index=False)