# Intakes and Outcomes Merge

#### Imports

In [1]:
import pandas as pd

#### Read in Data

In [2]:
intakes = pd.read_csv('../data/intakes_clean.csv', parse_dates=['datetime'])

In [3]:
outcomes = pd.read_csv('../data/outcomes_cleaned.csv', parse_dates=['datetime'])

#### Merge Datasets

In [4]:
intake_merge = intakes[['animal_id', 'datetime', 'category']]

In [5]:
outcome_merge = outcomes[['animal_id', 'datetime', 'category']]

In [6]:
intake_merge.head()

Unnamed: 0,animal_id,datetime,category
0,A786884,2019-01-03 16:19:00,intake
1,A706918,2015-07-05 12:59:00,intake
2,A724273,2016-04-14 18:43:00,intake
3,A665644,2013-10-21 07:59:00,intake
4,A682524,2014-06-29 10:38:00,intake


In [7]:
outcome_merge.head()

Unnamed: 0,animal_id,datetime,category
0,A794011,2019-05-08 18:20:00,outcome
1,A776359,2018-07-18 16:02:00,outcome
2,A821648,2020-08-16 11:38:00,outcome
3,A720371,2016-02-13 17:59:00,outcome
4,A674754,2014-03-18 11:47:00,outcome


In [8]:
intermediate = pd.concat([intake_merge, outcome_merge]).sort_values('datetime')

In [9]:
intermediate['time_shift'] = intermediate.groupby('animal_id').datetime.shift(-1)
intermediate['next_event'] = intermediate.groupby('animal_id').category.shift(-1)

In [10]:
intermediate.head()

Unnamed: 0,animal_id,datetime,category,time_shift,next_event
67176,A521520,2013-10-01 07:51:00,intake,2013-10-01 15:39:00,outcome
35879,A664235,2013-10-01 08:33:00,intake,2013-10-01 10:39:00,outcome
2185,A664237,2013-10-01 08:33:00,intake,2013-10-01 10:44:00,outcome
106680,A664236,2013-10-01 08:33:00,intake,2013-10-01 10:44:00,outcome
108286,A664233,2013-10-01 08:53:00,intake,2013-10-01 15:33:00,outcome


In [11]:
intermediate[intermediate['animal_id'] == 'A521520']

Unnamed: 0,animal_id,datetime,category,time_shift,next_event
67176,A521520,2013-10-01 07:51:00,intake,2013-10-01 15:39:00,outcome
117454,A521520,2013-10-01 15:39:00,outcome,NaT,


In [12]:
intermediate[intermediate['animal_id'] == 'A721033'].head(25)

Unnamed: 0,animal_id,datetime,category,time_shift,next_event
88298,A721033,2016-02-20 10:44:00,intake,2016-02-20 16:18:00,outcome
63353,A721033,2016-02-20 16:18:00,outcome,2016-07-10 11:53:00,intake
33457,A721033,2016-07-10 11:53:00,intake,2016-07-11 17:44:00,outcome
59076,A721033,2016-07-11 17:44:00,outcome,2016-09-03 14:30:00,intake
103964,A721033,2016-09-03 14:30:00,intake,2016-09-12 13:40:00,outcome
84908,A721033,2016-09-12 13:40:00,outcome,2016-10-20 22:47:00,intake
26724,A721033,2016-10-20 22:47:00,intake,2016-10-21 18:55:00,outcome
11787,A721033,2016-10-21 18:55:00,outcome,2016-12-15 10:07:00,intake
38042,A721033,2016-12-15 10:07:00,intake,2016-12-16 11:32:00,outcome
83750,A721033,2016-12-16 11:32:00,outcome,2017-01-09 14:26:00,intake


In [13]:
#ensures we only get intake to outcome results
intermediate = intermediate[(intermediate['category'] == 'intake') & (intermediate['next_event'] == 'outcome')]

In [14]:
intermediate = intakes.merge(intermediate, left_on=['animal_id', 'datetime'], 
                             right_on=['animal_id', 'datetime'],
                            suffixes=['_intake', '_intermediate'])

In [15]:
intakes_outcomes = outcomes.merge(intermediate, left_on=['animal_id', 'datetime'], 
                       right_on=['animal_id', 'time_shift'],
                      suffixes=['_outcome', '_intermediate2'])

In [16]:
intakes_outcomes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 124863 entries, 0 to 124862
Data columns (total 29 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   animal_id                   124863 non-null  object        
 1   datetime_outcome            124863 non-null  datetime64[ns]
 2   outcome_type                124843 non-null  object        
 3   outcome_subtype             57306 non-null   object        
 4   spay_neuter_outcome         124862 non-null  object        
 5   sex_outcome                 124862 non-null  object        
 6   age_in_years_outcome        124862 non-null  float64       
 7   category                    124863 non-null  object        
 8   name                        85775 non-null   object        
 9   datetime_intermediate2      124863 non-null  datetime64[ns]
 10  intake_type                 124863 non-null  object        
 11  intake_condition            124863 non-

#### Filter Dataset

In [None]:
dogs = outcomes_intakes[outcomes_intakes['animal_type_outcome'] == 'Dog']

#### Save csv

In [None]:
dogs.to_csv('../data/dogs.csv', index=False)