In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import requests
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
from sodapy import Socrata
from pygeocoder import Geocoder
import warnings
warnings.filterwarnings('ignore')

In [2]:
client = Socrata("data.austintexas.gov", None)
results = client.get("fdzn-9yqv", limit=100000)
intake_df = pd.DataFrame.from_records(results)



In [3]:
intake_df = intake_df.set_index("animal_id")

In [4]:
# rename datetime and sex to normal names
intake_df.drop(['datetime2'], axis=1, inplace=True)
intake_df.rename(columns={'datetime': 'date_in', 'sex_upon_intake': 'sex'}, inplace=True)


In [5]:
# only get dogs
intake_df = intake_df.loc[intake_df['animal_type'] == "Dog"]

In [6]:
# split color on /
intake_df.color = intake_df.color.str.split('/')

In [7]:
intake_df.found_location = intake_df['found_location'].str[:-5].str.replace(" in ", " ")

In [8]:
intake_df

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A786884,2 years,Dog,Beagle Mix,[Tricolor],2019-01-03T16:19:00.000,2501 Magin Meadow Dr Austin,Normal,Stray,*Brock,Neutered Male
A706918,8 years,Dog,English Springer Spaniel,"[White, Liver]",2015-07-05T12:59:00.000,9409 Bluegrass Dr Austin,Normal,Stray,Belle,Spayed Female
A724273,11 months,Dog,Basenji Mix,"[Sable, White]",2016-04-14T18:43:00.000,2818 Palomino Trail Austin,Normal,Stray,Runster,Intact Male
A778404,4 years,Dog,German Shepherd Mix,"[Black, Tan]",2018-08-14T10:19:00.000,Austin,Normal,Owner Surrender,Max,Intact Male
A682524,4 years,Dog,Doberman Pinsch/Australian Cattle Dog,"[Tan, Gray]",2014-06-29T10:38:00.000,800 Grove Blvd Austin,Normal,Stray,Rio,Neutered Male
A743852,2 years,Dog,Labrador Retriever Mix,[Chocolate],2017-02-18T12:46:00.000,Austin,Normal,Owner Surrender,Odin,Neutered Male
A708452,2 years,Dog,Labrador Retriever Mix,"[Black, White]",2015-07-30T14:37:00.000,Austin,Normal,Public Assist,Mumble,Intact Male
A760053,2 years,Dog,Chihuahua Shorthair,"[White, Tan]",2017-10-11T15:46:00.000,8800 South First Street Austin,Normal,Stray,,Intact Male
A707375,5 months,Dog,Pit Bull,"[Brown, White]",2015-07-11T18:19:00.000,Galilee Court And Damita Jo Dr Manor,Normal,Stray,*Candy Cane,Intact Female
A696408,2 years,Dog,Chihuahua Shorthair,[Tricolor],2015-02-04T12:58:00.000,9705 Thaxton Austin,Normal,Stray,*Pearl,Intact Female


In [9]:
sex_series = intake_df.sex.str.split(" ")
intake_df['fixed'] = sex_series.str[0]
intake_df['sex'] = sex_series.str[1]


In [10]:
intake_df.name = intake_df.name.str.replace("*", "")

In [11]:
intake_df.date_in = intake_df.date_in.apply(pd.to_datetime)

In [12]:
intake_df.fixed = intake_df.fixed.map({
    "Neutered": "Yes",
    "Spayed": "Yes",
    "Intact": "No"
})

In [13]:
client = Socrata("data.austintexas.gov", None)
results = client.get("9t4d-g238", limit=100000)
outcomes_df = pd.DataFrame.from_records(results)



In [14]:
outcomes_df = outcomes_df.set_index("animal_id")

In [15]:
outcomes_df.rename(columns={'datetime': 'date_out'}, inplace=True)

In [16]:
outcomes_df.date_out = outcomes_df.date_out.apply(pd.to_datetime)

In [17]:
outcomes_df = outcomes_df[['date_of_birth', 'date_out', "outcome_subtype", "outcome_type"]]

In [18]:
combined_df = intake_df.merge(outcomes_df, on="animal_id", how="outer")

In [19]:
combined_df = combined_df.dropna(axis=0, subset=['date_in'])

In [20]:
combined_df['in_shelter'] = "No"
combined_df['in_shelter'][combined_df.outcome_type.isnull()] = "Yes"

In [21]:
combined_df.in_shelter.value_counts()

No     78574
Yes      459
Name: in_shelter, dtype: int64

In [22]:
combined_df['time_in_shelter'] = combined_df.date_out - combined_df.date_in

In [23]:
combined_df.in_shelter.value_counts()

No     78574
Yes      459
Name: in_shelter, dtype: int64

In [24]:
combined_df.loc[combined_df.in_shelter == "Yes"]

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A787254,3 days,Dog,Black Mouth Cur Mix,"[Black, White]",2019-01-10 10:19:00,2000 Man O War Travis,Normal,Stray,Sprinkles,Female,No,,NaT,,,Yes,NaT
A787398,2 years,Dog,Pit Bull Mix,"[Brown, White]",2019-01-12 13:46:00,Austin,Normal,Owner Surrender,Coco,Female,Yes,,NaT,,,Yes,NaT
A780008,1 year,Dog,Labrador Retriever Mix,"[White, Tan]",2018-09-08 14:22:00,12118 Walnut Park Crossing Austin,Normal,Stray,Peedee,Male,No,,NaT,,,Yes,NaT
A771540,8 years,Dog,Staffordshire/American Bulldog,"[White, Brown]",2018-05-06 14:17:00,Austin,Normal,Owner Surrender,Big Sherm,Male,Yes,,NaT,,,Yes,NaT
A788780,1 year,Dog,Pit Bull Mix,"[White, Tan]",2019-02-08 17:14:00,Hawkanson And Peterson Travis,Normal,Stray,Bevo,Male,No,,NaT,,,Yes,NaT
A772035,2 years,Dog,Pit Bull Mix,"[Blue, White]",2018-05-13 17:36:00,4401 Elmsgrove Drive Austin,Normal,Stray,Blue,Male,No,,NaT,,,Yes,NaT
A771447,1 year,Dog,Pit Bull Mix,"[Tan, White]",2018-05-05 11:59:00,Austin,Normal,Owner Surrender,Erica,Female,Yes,,NaT,,,Yes,NaT
A783054,3 years,Dog,Pit Bull Mix,[Brown Brindle],2018-10-24 16:09:00,Travis,Normal,Owner Surrender,Sydney,Female,Yes,,NaT,,,Yes,NaT
A787044,2 years,Dog,Akbash Mix,[White],2019-01-06 17:57:00,Austin,Normal,Public Assist,Rommel,Male,Yes,,NaT,,,Yes,NaT
A787614,1 year,Dog,Pit Bull Mix,"[Brown, White]",2019-01-16 21:53:00,6204 Pino Lane Austin,Normal,Stray,Mack,Male,No,,NaT,,,Yes,NaT


In [25]:
mask = ((combined_df.time_in_shelter < pd.Timedelta(0)))
#combined_df = combined_df.mask(mask)
#.dropna(subset=['time_in_shelter'], axis=0)
combined_df = combined_df.drop(mask.loc[mask == True].index)

In [26]:
combined_df.in_shelter.value_counts()

No     40198
Yes      459
Name: in_shelter, dtype: int64

In [27]:
features = combined_df.columns.tolist()
features.remove("color")
features.remove("date_out")
features.remove("outcome_type")
features

['age_upon_intake',
 'animal_type',
 'breed',
 'date_in',
 'found_location',
 'intake_condition',
 'intake_type',
 'name',
 'sex',
 'fixed',
 'date_of_birth',
 'outcome_subtype',
 'in_shelter',
 'time_in_shelter']

In [28]:
combined_df.date_of_birth = combined_df.date_of_birth.apply(pd.to_datetime)

In [29]:
combined_df["age_in"] = combined_df.date_in - combined_df.date_of_birth
combined_df.age_in = (combined_df.age_in.apply(lambda x: x.days)/365).round().replace(0.0, 0.5)
#combined_df.age_in

In [30]:
combined_df["age_out"] = combined_df.date_out - combined_df.date_of_birth
combined_df.age_out =  (combined_df.age_out.apply(lambda x: x.days)/365).round().replace(0.0, 0.5)

In [31]:
combined_df = combined_df.drop_duplicates(subset=features)

In [32]:
combined_df.drop(['age_upon_intake', 'animal_type'], axis=1, inplace=True)

In [33]:
combined_unique_df = combined_df[~combined_df.index.duplicated(keep='first')]

In [34]:
combined_unique_df.head()

Unnamed: 0_level_0,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter,age_in,age_out
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A786884,Beagle Mix,[Tricolor],2019-01-03 16:19:00,2501 Magin Meadow Dr Austin,Normal,Stray,Brock,Male,Yes,2017-01-03,2019-01-08 15:11:00,Partner,Transfer,No,4 days 22:52:00,2.0,2.0
A706918,English Springer Spaniel,"[White, Liver]",2015-07-05 12:59:00,9409 Bluegrass Dr Austin,Normal,Stray,Belle,Female,Yes,2007-07-05,2015-07-05 15:13:00,,Return to Owner,No,0 days 02:14:00,8.0,8.0
A724273,Basenji Mix,"[Sable, White]",2016-04-14 18:43:00,2818 Palomino Trail Austin,Normal,Stray,Runster,Male,No,2015-04-17,2016-04-21 17:17:00,,Return to Owner,No,6 days 22:34:00,1.0,1.0
A778404,German Shepherd Mix,"[Black, Tan]",2018-08-14 10:19:00,Austin,Normal,Owner Surrender,Max,Male,No,2014-08-14,2018-08-17 18:03:00,,Adoption,No,3 days 07:44:00,4.0,4.0
A682524,Doberman Pinsch/Australian Cattle Dog,"[Tan, Gray]",2014-06-29 10:38:00,800 Grove Blvd Austin,Normal,Stray,Rio,Male,Yes,2010-06-29,2014-07-02 14:16:00,,Return to Owner,No,3 days 03:38:00,4.0,4.0


### Get geocodes from addresses

*NOTE: DO NOT RUN THESE CELLS*

---

In [35]:
combined_unique_df.tail()

Unnamed: 0_level_0,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter,age_in,age_out
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A789901,German Shepherd/Doberman Pinsch,"[Black, Tan]",2019-03-01 15:58:00,14719 Us Hwy 290 East Manor,Normal,Stray,Beerus,Male,No,2018-11-01,2019-03-10 15:10:00,,Adoption,No,8 days 23:12:00,0.5,0.5
A790070,German Shepherd Mix,"[Brown, Black]",2019-03-04 19:16:00,Austin,Normal,Public Assist,Lucy,Female,Yes,NaT,NaT,,,Yes,NaT,,
A788592,Boxer Mix,"[Tan, White]",2019-02-05 11:18:00,225 Maldonado Trail Travis,Normal,Stray,Rocky,Male,Yes,NaT,NaT,,,Yes,NaT,,
A790216,Pit Bull Mix,"[Brown Tiger, White]",2019-03-06 15:18:00,1809 W Rundberg Ln Apt Number 235 Austin,Normal,Stray,Rajah,Male,No,NaT,NaT,,,Yes,NaT,,
A790182,Pit Bull/Pointer,"[Tan, White]",2019-03-06 09:57:00,1407 Atterbury Ln Austin,Normal,Stray,Xena,Female,No,NaT,NaT,,,Yes,NaT,,


In [36]:
combined_df.to_csv('austin_shelter.csv')

In [37]:
combined_unique_df.to_csv('unique_austin_shelter.csv')

In [38]:
combined_df.in_shelter.value_counts()

No     39616
Yes      427
Name: in_shelter, dtype: int64

In [39]:
combined_unique_df.in_shelter.value_counts()

No     39603
Yes      427
Name: in_shelter, dtype: int64

In [40]:
combined_df.age_in

animal_id
A786884     2.0
A706918     8.0
A724273     1.0
A778404     4.0
A682524     4.0
A743852     2.0
A708452     2.0
A760053     2.0
A707375     0.5
A696408     2.0
A697950     0.5
A298074    19.0
A769764     0.5
A682230     0.5
A749436     0.5
A759935     0.5
A666877     1.0
A732903     0.5
A769816     5.0
A724378     4.0
A745133     1.0
A721791     0.5
A675555     3.0
A747964     6.0
A787254     NaN
A710890     3.0
A683884     2.0
A676263     2.0
A736719    12.0
A770028     1.0
           ... 
A790425     NaN
A761683     3.0
A790422     2.0
A790198     NaN
A790407    10.0
A790424     NaN
A790420     NaN
A790280     NaN
A790367     1.0
A790397     1.0
A789484     2.0
A789444     1.0
A790432     NaN
A790431     NaN
A790430     NaN
A790279     NaN
A790218     NaN
A790426     NaN
A790387     NaN
A790247     NaN
A790439     NaN
A790463     NaN
A790435     1.0
A788004     NaN
A790445     NaN
A789901     0.5
A790070     NaN
A788592     NaN
A790216     NaN
A790182     NaN
Name: age_in, 