# Dog License Data Processing

In [2]:
import pandas as pd
import cartoframes

In [3]:
cc = cartoframes.CartoContext()

In [4]:
dog_licenses = pd.read_csv("NYC_Dog_Licensing_Dataset.csv")

In [11]:
dog_licenses.head()

Unnamed: 0,RowNumber,AnimalName,AnimalGender,AnimalBirthMonth,BreedName,Borough,ZipCode,CommunityDistrict,CensusTract2010,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate
0,1753,SHADOW,M,01/01/2000 12:00:00 AM,Beagle,Brooklyn,11236.0,318.0,1014.0,BK50,46.0,8.0,19.0,12/29/2014,01/30/2016
1,2415,ROCCO,M,10/01/2011 12:00:00 AM,Boxer,Brooklyn,11210.0,314.0,756.0,BK43,45.0,9.0,17.0,01/07/2015,01/30/2016
2,3328,LUIGI,M,09/01/2005 12:00:00 AM,Maltese,Bronx,10464.0,210.0,516.0,BX10,13.0,14.0,34.0,01/17/2015,02/02/2016
3,7537,PETUNIA,F,08/01/2013 12:00:00 AM,Pug,Brooklyn,11221.0,304.0,419.0,BK78,34.0,7.0,18.0,03/01/2015,03/28/2016
4,8487,ROMEO,M,10/01/2008 12:00:00 AM,Maltese,Bronx,10451.0,201.0,65.0,BX34,17.0,15.0,32.0,03/09/2015,03/09/2016


In [10]:
cc.write(dog_licenses, "dog_licenses")

The following columns were changed in the CARTO copy of this dataframe:
[1mRowNumber[0m -> [1mrownumber[0m
[1mAnimalName[0m -> [1manimalname[0m
[1mAnimalGender[0m -> [1manimalgender[0m
[1mAnimalBirthMonth[0m -> [1manimalbirthmonth[0m
[1mBreedName[0m -> [1mbreedname[0m
[1mBorough[0m -> [1mborough[0m
[1mZipCode[0m -> [1mzipcode[0m
[1mCommunityDistrict[0m -> [1mcommunitydistrict[0m
[1mCensusTract2010[0m -> [1mcensustract2010[0m
[1mNTA[0m -> [1mnta[0m
[1mCityCouncilDistrict[0m -> [1mcitycouncildistrict[0m
[1mCongressionalDistrict[0m -> [1mcongressionaldistrict[0m
[1mStateSenatorialDistrict[0m -> [1mstatesenatorialdistrict[0m
[1mLicenseIssuedDate[0m -> [1mlicenseissueddate[0m
[1mLicenseExpiredDate[0m -> [1mlicenseexpireddate[0m
Table successfully written to CARTO: https://michellemho-carto.carto.com/dataset/dog_licenses


In [13]:
cc.query("""
ALTER TABLE dog_licenses ALTER COLUMN licenseissueddate TYPE DATE 
using to_date(licenseissueddate, 'MM/DD/YYYY');
""")

In [16]:
# Just 2017
cc.query("""
SELECT * FROM dog_licenses
WHERE licenseissueddate >= '01/01/2017'::date
""", table_name='dog_licenses_2017')

Table successfully written to CARTO: https://michellemho-carto.carto.com/dataset/dog_licenses_2017


Unnamed: 0,rownumber,statesenatorialdistrict,citycouncildistrict,borough,the_geom,animalgender,animalbirthmonth,licenseexpireddate,zipcode,nta,animalname,communitydistrict,breedname,congressionaldistrict,licenseissueddate,censustract2010


In [17]:
# Count by breed
count_by_breed = cc.query("""
SELECT row_number() OVER () As cartodb_id,
BreedName, count(*), nynta_2.ntacode, nynta_2.the_geom, nynta_2.the_geom_webmercator
FROM dog_licenses
JOIN nynta_2
ON (dog_licenses.nta = nynta_2.ntacode)
GROUP BY BreedName, nynta_2.ntacode, nynta_2.the_geom, nynta_2.the_geom_webmercator
""")

In [18]:
count_by_breed.head()

Unnamed: 0_level_0,breedname,count,ntacode,the_geom
cartodb_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Affenpinscher,1,BK35,0106000020E610000001000000010300000001000000AE...
2,Affenpinscher,1,BK37,0106000020E610000001000000010300000001000000C9...
3,Affenpinscher,1,BK42,0106000020E61000000100000001030000000100000093...
4,Affenpinscher,1,BK44,0106000020E6100000010000000103000000010000005B...
5,Affenpinscher,2,BK61,0106000020E61000000100000001030000000100000087...


In [36]:
count_by_breed.drop('the_geom', axis=1).to_csv('count_by_breed_by_nta.csv')

In [21]:
# Top 10 Dog Names (all of NYC)

top_dog_names = cc.query("""
SELECT animalname, count(*)
FROM dog_licenses
GROUP BY animalname
ORDER BY count desc""")

top_dog_names[:12]

Unnamed: 0,animalname,count
0,UNKNOWN,2467
1,NAME NOT PROVIDED,1764
2,BELLA,1008
3,MAX,1001
4,CHARLIE,750
5,LOLA,686
6,ROCKY,668
7,COCO,642
8,LUCY,581
9,BUDDY,559


In [26]:
# Count of Dog Names by Neighborhood

count_by_name = cc.query("""
SELECT row_number() OVER () As cartodb_id,
animalname, count(*), nynta_2.ntacode
FROM dog_licenses
JOIN nynta_2
ON (dog_licenses.nta = nynta_2.ntacode)
WHERE animalname <> 'UNKNOWN'
AND animalname <> ''
AND animalname <> 'NAME NOT PROVIDED'
GROUP BY animalname, nynta_2.ntacode
ORDER BY count desc
""", table_name='dog_name_count_by_nta')

Table successfully written to CARTO: https://michellemho-carto.carto.com/dataset/dog_name_count_by_nta


In [29]:
top_10_names_by_nta = cc.query("""SELECT
  * 
FROM (
  SELECT
    ROW_NUMBER() OVER (PARTITION BY ntacode ORDER BY count desc) AS r,
    t.*
  FROM
    dog_name_count_by_nta t) x
WHERE
  x.r <= 10""")

In [31]:
# Top Dog Names by Neighborhood
top_10_names_by_nta.to_csv('top_10_dognames_by_nta.csv')

In [43]:
# Total number of dog licenses per neighborhood
count_licenses_by_nta = dog_licenses.groupby('NTA')['AnimalName'].count()
count_licenses_by_nta.to_csv('count_licenses_by_nta.csv')

In [None]:
# Total number of licenses as a percentage of population per neighborhood
nynta_pop = pd.read_excel('../nta_data/demo_2016acs5yr_nta.xlsx')
count_licenses_by_nta = pd.DataFrame(count_licenses_by_nta)
count_licenses_by_nta.columns = ['doglicense_count']
pop_and_dogs = nynta_pop.join(count_licenses_by_nta, on = 'GeoID')

pop_and_dogs['doglicenses_per_person'] = pop_and_dogs['doglicense_count']/pop_and_dogs['Pop_1E']

doglicenses_per_person = pop_and_dogs[['doglicense_count','doglicenses_per_person','GeoID']]
doglicenses_per_person.to_csv('doglicenses_per_person.csv')