In [160]:
%matplotlib inline

import numpy as np
import pandas as pd
import requests
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
from sodapy import Socrata
from pygeocoder import Geocoder
import warnings
warnings.filterwarnings('ignore')

In [71]:
client = Socrata("data.austintexas.gov", None)
results = client.get("fdzn-9yqv", limit=100000)
intake_df = pd.DataFrame.from_records(results)



In [72]:
intake_df = intake_df.set_index("animal_id")

In [73]:
# rename datetime and sex to normal names
intake_df.drop(['datetime2'], axis=1, inplace=True)
intake_df.rename(columns={'datetime': 'date_in', 'sex_upon_intake': 'sex'}, inplace=True)


In [74]:
# only get dogs
intake_df = intake_df.loc[intake_df['animal_type'] == "Dog"]

In [75]:
# split color on /
intake_df.color = intake_df.color.str.split('/')

In [76]:
intake_df.found_location = intake_df['found_location'].str[:-5].str.replace(" in ", " ")

In [77]:
intake_df

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A786884,2 years,Dog,Beagle Mix,[Tricolor],2019-01-03T16:19:00.000,2501 Magin Meadow Dr Austin,Normal,Stray,*Brock,Neutered Male
A706918,8 years,Dog,English Springer Spaniel,"[White, Liver]",2015-07-05T12:59:00.000,9409 Bluegrass Dr Austin,Normal,Stray,Belle,Spayed Female
A724273,11 months,Dog,Basenji Mix,"[Sable, White]",2016-04-14T18:43:00.000,2818 Palomino Trail Austin,Normal,Stray,Runster,Intact Male
A778404,4 years,Dog,German Shepherd Mix,"[Black, Tan]",2018-08-14T10:19:00.000,Austin,Normal,Owner Surrender,Max,Intact Male
A682524,4 years,Dog,Doberman Pinsch/Australian Cattle Dog,"[Tan, Gray]",2014-06-29T10:38:00.000,800 Grove Blvd Austin,Normal,Stray,Rio,Neutered Male
A743852,2 years,Dog,Labrador Retriever Mix,[Chocolate],2017-02-18T12:46:00.000,Austin,Normal,Owner Surrender,Odin,Neutered Male
A708452,2 years,Dog,Labrador Retriever Mix,"[Black, White]",2015-07-30T14:37:00.000,Austin,Normal,Public Assist,Mumble,Intact Male
A760053,2 years,Dog,Chihuahua Shorthair,"[White, Tan]",2017-10-11T15:46:00.000,8800 South First Street Austin,Normal,Stray,,Intact Male
A707375,5 months,Dog,Pit Bull,"[Brown, White]",2015-07-11T18:19:00.000,Galilee Court And Damita Jo Dr Manor,Normal,Stray,*Candy Cane,Intact Female
A696408,2 years,Dog,Chihuahua Shorthair,[Tricolor],2015-02-04T12:58:00.000,9705 Thaxton Austin,Normal,Stray,*Pearl,Intact Female


In [78]:
sex_series = intake_df.sex.str.split(" ")
intake_df['fixed'] = sex_series.str[0]
intake_df['sex'] = sex_series.str[1]


In [79]:
intake_df.name = intake_df.name.str.replace("*", "")

In [80]:
intake_df.date_in = intake_df.date_in.apply(pd.to_datetime)

In [81]:
intake_df.fixed = intake_df.fixed.map({
    "Neutered": "Yes",
    "Spayed": "Yes",
    "Intact": "No"
})

In [82]:
client = Socrata("data.austintexas.gov", None)
results = client.get("9t4d-g238", limit=100000)
outcomes_df = pd.DataFrame.from_records(results)



In [83]:
outcomes_df = outcomes_df.set_index("animal_id")

In [84]:
outcomes_df.rename(columns={'datetime': 'date_out'}, inplace=True)

In [85]:
outcomes_df.date_out = outcomes_df.date_out.apply(pd.to_datetime)

In [86]:
outcomes_df = outcomes_df[['date_of_birth', 'date_out', "outcome_subtype", "outcome_type"]]

In [161]:
combined_df = intake_df.merge(outcomes_df, on="animal_id", how="outer")

In [162]:
combined_df = combined_df.dropna(axis=0, subset=['date_in'])

In [163]:
combined_df['in_shelter'] = "No"
combined_df['in_shelter'][combined_df.outcome_type.isnull()] = "Yes"

In [164]:
combined_df['time_in_shelter'] = combined_df.date_out - combined_df.date_in

In [165]:
mask = ((combined_df.time_in_shelter < pd.Timedelta(0)))
combined_df = combined_df.mask(mask).dropna(subset=['time_in_shelter'], axis=0)

In [166]:
features = combined_df.columns.tolist()
features.remove("color")
features.remove("date_out")
features.remove("outcome_type")
features

['age_upon_intake',
 'animal_type',
 'breed',
 'date_in',
 'found_location',
 'intake_condition',
 'intake_type',
 'name',
 'sex',
 'fixed',
 'date_of_birth',
 'outcome_subtype',
 'in_shelter',
 'time_in_shelter']

In [167]:
combined_df = combined_df.drop_duplicates(subset=features)

In [168]:
combined_unique_df = combined_df[~combined_df.index.duplicated(keep='first')]

In [170]:
combined_unique_df.head()

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A786884,2 years,Dog,Beagle Mix,[Tricolor],2019-01-03 16:19:00,2501 Magin Meadow Dr Austin,Normal,Stray,Brock,Male,Yes,2017-01-03T00:00:00.000,2019-01-08 15:11:00,Partner,Transfer,No,4 days 22:52:00
A706918,8 years,Dog,English Springer Spaniel,"[White, Liver]",2015-07-05 12:59:00,9409 Bluegrass Dr Austin,Normal,Stray,Belle,Female,Yes,2007-07-05T00:00:00.000,2015-07-05 15:13:00,,Return to Owner,No,0 days 02:14:00
A724273,11 months,Dog,Basenji Mix,"[Sable, White]",2016-04-14 18:43:00,2818 Palomino Trail Austin,Normal,Stray,Runster,Male,No,2015-04-17T00:00:00.000,2016-04-21 17:17:00,,Return to Owner,No,6 days 22:34:00
A778404,4 years,Dog,German Shepherd Mix,"[Black, Tan]",2018-08-14 10:19:00,Austin,Normal,Owner Surrender,Max,Male,No,2014-08-14T00:00:00.000,2018-08-17 18:03:00,,Adoption,No,3 days 07:44:00
A682524,4 years,Dog,Doberman Pinsch/Australian Cattle Dog,"[Tan, Gray]",2014-06-29 10:38:00,800 Grove Blvd Austin,Normal,Stray,Rio,Male,Yes,2010-06-29T00:00:00.000,2014-07-02 14:16:00,,Return to Owner,No,3 days 03:38:00


### Get geocodes from addresses

*NOTE: DO NOT RUN THESE CELLS*

---

In [143]:
with open('geocodes.txt', 'w') as f:
    for item in geocodes:
        f.write("%s\n" % item)

In [171]:
combined_unique_df['coordinates'] = geocodes

In [172]:
combined_unique_df['coordinates'] = combined_unique_df['coordinates'].apply(lambda x: (x['lat'], x['lng']))

In [174]:
combined_unique_df.tail()

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter,coordinates
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
A768390,7 months,Dog,Chihuahua Shorthair/Catahoula,[Red Merle],2018-03-18 15:40:00,1108 Teapot Drive Pflugerville,Normal,Stray,Peanut,Female,No,2017-07-18T00:00:00.000,2019-03-09 14:09:00,,Adoption,No,355 days 22:29:00,"(30.42312789999999, -97.6409735)"
A790382,2 years,Dog,American Bulldog Mix,"[White, Black]",2019-03-09 12:49:00,2108 Barton Hills Drive Austin,Normal,Stray,Jax,Male,Yes,2017-03-09T00:00:00.000,2019-03-09 14:24:00,,Return to Owner,No,0 days 01:35:00,"(30.2543951, -97.7833567)"
A789935,1 month,Dog,Plott Hound Mix,"[Brown Brindle, White]",2019-03-02 11:38:00,1500 Dale Austin,Normal,Stray,Rocket,Male,No,2019-01-02T00:00:00.000,2019-03-09 18:28:00,,Adoption,No,7 days 06:50:00,"(30.3545552, -97.72067009999999)"
A790159,5 months,Dog,German Shepherd Mix,[Black],2019-03-05 17:09:00,2200 South Pleasant Valley Drive Austin,Normal,Stray,Ace,Male,No,2018-09-05T00:00:00.000,2019-03-09 19:11:00,,Adoption,No,4 days 02:02:00,"(30.230911, -97.7261898)"
A790405,5 years,Dog,Chihuahua Shorthair Mix,"[Tan, Cream]",2019-03-09 16:35:00,45Th Street And Speedway Austin,Normal,Stray,Cachito,Male,No,2014-03-09T00:00:00.000,2019-03-09 18:03:00,,Return to Owner,No,0 days 01:28:00,"(30.3086372, -97.72839239999999)"


In [None]:
combined_df.to_csv('shelter_')