In [367]:
%matplotlib inline

import numpy as np
import pandas as pd
import requests
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
from sodapy import Socrata


In [368]:
# intake = requests.get("https://data.austintexas.gov/resource/fdzn-9yqv.json").json()
client = Socrata("data.austintexas.gov", None)
results = client.get("fdzn-9yqv", limit=100000)
intake_df = pd.DataFrame.from_records(results)



In [369]:
intake_df = intake_df.set_index("animal_id")

In [370]:
# rename datetime and sex to normal names
intake_df.drop(['datetime2'], axis=1, inplace=True)
intake_df.rename(columns={'datetime': 'date_in', 'sex_upon_intake': 'sex'}, inplace=True)


In [371]:
# only get dogs
intake_df = intake_df.loc[intake_df['animal_type'] == "Dog"]

In [372]:
# split color on /
intake_df.color = intake_df.color.str.split('/')

In [373]:
intake_df.found_location = intake_df['found_location'].str[:-5].str.replace(" in ", " ")

In [374]:
def getLat(found_location):
    location = Nominatim(user_agent="project").geocode(found_location)
    if(location):
        return location.latitude
    

In [475]:
location = Nominatim(user_agent="project").geocode("Loyola And Johnny Morris Austin")
location

In [375]:
def getLong(found_location):
    location = Nominatim().geocode(found_location)
    if(location):
        return location.longitude
    

In [376]:
sex_series = intake_df.sex.str.split(" ")
intake_df['fixed'] = sex_series.str[0]
intake_df['sex'] = sex_series.str[1]


In [377]:
intake_df.name = intake_df.name.str.replace("*", "")

In [378]:
intake_df.date_in = intake_df.date_in.apply(pd.to_datetime)

In [383]:
intake_df.fixed = intake_df.fixed.map({
    "Neutered": "Yes",
    "Spayed": "Yes",
    "Intact": "No"
})

In [455]:
client = Socrata("data.austintexas.gov", None)
results = client.get("9t4d-g238", limit=100000)
outcomes_df = pd.DataFrame.from_records(results)



In [456]:
outcomes_df = outcomes_df.set_index("animal_id")

In [458]:
outcomes_df.rename(columns={'datetime': 'date_out'}, inplace=True)

In [459]:
outcomes_df.date_out = outcomes_df.date_out.apply(pd.to_datetime)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [460]:
outcomes_df = outcomes_df[['date_of_birth', 'date_out', "outcome_subtype", "outcome_type"]]

In [461]:
combined_df = intake_df.merge(outcomes_df, on="animal_id", how="outer")

In [462]:
combined_df = combined_df.dropna(axis=0, subset=['date_in'])

In [463]:
combined_df['in_shelter'] = "No"
combined_df['in_shelter'][combined_df.outcome_type.isnull()] = "Yes"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [464]:
combined_df.dtypes

age_upon_intake             object
animal_type                 object
breed                       object
color                       object
date_in             datetime64[ns]
found_location              object
intake_condition            object
intake_type                 object
name                        object
sex                         object
fixed                       object
date_of_birth               object
date_out            datetime64[ns]
outcome_subtype             object
outcome_type                object
in_shelter                  object
dtype: object

In [553]:
combined_df['time_in_shelter'] = combined_df.date_out - combined_df.date_in

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [579]:
mask = ((combined_df.time_in_shelter < pd.Timedelta(0)))
combined_df = combined_df.mask(mask).dropna(subset=['time_in_shelter'], axis=0)

In [401]:
(combined_df.date_out != np.nan).value_counts()

True    120696
Name: date_out, dtype: int64

In [402]:
combined_df.in_shelter.value_counts()

Yes    120696
Name: in_shelter, dtype: int64

In [580]:
combined_df.time_in_shelter

animal_id
A786884     4 days 22:52:00
A706918     0 days 02:14:00
A724273     6 days 22:34:00
A778404     3 days 07:44:00
A682524     3 days 03:38:00
A743852     3 days 04:58:00
A708452     5 days 03:40:00
A760053     4 days 02:44:00
A707375   139 days 23:12:00
A696408   112 days 22:26:00
A697950     4 days 07:33:00
A298074     0 days 21:19:00
A769764     3 days 21:09:00
A682230     4 days 00:59:00
A749436     0 days 03:35:00
A759935     7 days 19:39:00
A666877     6 days 23:29:00
A732903     6 days 00:38:00
A769816    21 days 22:51:00
A700396   363 days 04:27:00
A700396    74 days 19:03:00
A700396   355 days 05:37:00
A700396   397 days 02:54:00
A700396   601 days 02:40:00
A700396   447 days 02:41:00
A724378     3 days 23:35:00
A702701   244 days 22:40:00
A702701    14 days 20:05:00
A702701     0 days 01:44:00
A745133    10 days 23:12:00
                 ...       
A789409    13 days 08:07:00
A789952     4 days 05:19:00
A789929     6 days 02:30:00
A790158     1 days 21:29:00
A790248   

In [470]:
combined_df.index.unique()

Index(['A786884', 'A706918', 'A724273', 'A778404', 'A682524', 'A743852',
       'A708452', 'A760053', 'A707375', 'A696408',
       ...
       'A790339', 'A790338', 'A790336', 'A790321', 'A790340', 'A784627',
       'A790069', 'A789881', 'A790231', 'A790327'],
      dtype='object', name='animal_id', length=47000)

In [552]:
combined_df.loc["A702701"]

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A702701,2 years,Dog,Miniature Pinscher Mix,"[Chocolate, Brown]",2015-05-16 18:38:00,Mcneil Drive & Burnet Road Austin,Normal,Stray,Pepper,Female,Yes,2013-05-16T00:00:00.000,2016-01-16 17:18:00,,Return to Owner,No
A702701,2 years,Dog,Miniature Pinscher Mix,"[Chocolate, Brown]",2015-05-16 18:38:00,Mcneil Drive & Burnet Road Austin,Normal,Stray,Pepper,Female,Yes,2013-05-16T00:00:00.000,2015-05-31 14:43:00,Partner,Transfer,No
A702701,2 years,Dog,Miniature Pinscher Mix,"[Chocolate, Brown]",2016-01-16 15:34:00,4600 W Guadalupe St Austin,Normal,Stray,Pepper,Female,Yes,2013-05-16T00:00:00.000,2016-01-16 17:18:00,,Return to Owner,No
A702701,2 years,Dog,Miniature Pinscher Mix,"[Chocolate, Brown]",2016-01-16 15:34:00,4600 W Guadalupe St Austin,Normal,Stray,Pepper,Female,Yes,2013-05-16T00:00:00.000,2015-05-31 14:43:00,Partner,Transfer,No


In [522]:
features = combined_df.columns.to_list()
features.remove("color")
features.remove("date_out")
features.remove("outcome_type")
features

['age_upon_intake',
 'animal_type',
 'breed',
 'date_in',
 'found_location',
 'intake_condition',
 'intake_type',
 'name',
 'sex',
 'fixed',
 'date_of_birth',
 'outcome_subtype',
 'in_shelter']

In [524]:
combined_df = combined_df.drop_duplicates(subset=features)

In [581]:
combined_unique_df = combined_df[~combined_df.index.duplicated(keep='first')]

In [582]:
combined_unique_df

Unnamed: 0_level_0,age_upon_intake,animal_type,breed,color,date_in,found_location,intake_condition,intake_type,name,sex,fixed,date_of_birth,date_out,outcome_subtype,outcome_type,in_shelter,time_in_shelter
animal_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
A786884,2 years,Dog,Beagle Mix,[Tricolor],2019-01-03 16:19:00,2501 Magin Meadow Dr Austin,Normal,Stray,Brock,Male,Yes,2017-01-03T00:00:00.000,2019-01-08 15:11:00,Partner,Transfer,No,4 days 22:52:00
A706918,8 years,Dog,English Springer Spaniel,"[White, Liver]",2015-07-05 12:59:00,9409 Bluegrass Dr Austin,Normal,Stray,Belle,Female,Yes,2007-07-05T00:00:00.000,2015-07-05 15:13:00,,Return to Owner,No,0 days 02:14:00
A724273,11 months,Dog,Basenji Mix,"[Sable, White]",2016-04-14 18:43:00,2818 Palomino Trail Austin,Normal,Stray,Runster,Male,No,2015-04-17T00:00:00.000,2016-04-21 17:17:00,,Return to Owner,No,6 days 22:34:00
A778404,4 years,Dog,German Shepherd Mix,"[Black, Tan]",2018-08-14 10:19:00,Austin,Normal,Owner Surrender,Max,Male,No,2014-08-14T00:00:00.000,2018-08-17 18:03:00,,Adoption,No,3 days 07:44:00
A682524,4 years,Dog,Doberman Pinsch/Australian Cattle Dog,"[Tan, Gray]",2014-06-29 10:38:00,800 Grove Blvd Austin,Normal,Stray,Rio,Male,Yes,2010-06-29T00:00:00.000,2014-07-02 14:16:00,,Return to Owner,No,3 days 03:38:00
A743852,2 years,Dog,Labrador Retriever Mix,[Chocolate],2017-02-18 12:46:00,Austin,Normal,Owner Surrender,Odin,Male,Yes,2015-02-18T00:00:00.000,2017-02-21 17:44:00,,Return to Owner,No,3 days 04:58:00
A708452,2 years,Dog,Labrador Retriever Mix,"[Black, White]",2015-07-30 14:37:00,Austin,Normal,Public Assist,Mumble,Male,No,2013-07-28T00:00:00.000,2015-08-04 18:17:00,,Return to Owner,No,5 days 03:40:00
A760053,2 years,Dog,Chihuahua Shorthair,"[White, Tan]",2017-10-11 15:46:00,8800 South First Street Austin,Normal,Stray,,Male,No,2015-10-11T00:00:00.000,2017-10-15 18:30:00,,Adoption,No,4 days 02:44:00
A707375,5 months,Dog,Pit Bull,"[Brown, White]",2015-07-11 18:19:00,Galilee Court And Damita Jo Dr Manor,Normal,Stray,Candy Cane,Female,No,2015-01-11T00:00:00.000,2015-11-28 17:31:00,,Adoption,No,139 days 23:12:00
A696408,2 years,Dog,Chihuahua Shorthair,[Tricolor],2015-02-04 12:58:00,9705 Thaxton Austin,Normal,Stray,Pearl,Female,No,2013-02-04T00:00:00.000,2015-05-28 11:24:00,Foster,Adoption,No,112 days 22:26:00
