In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json


In [2]:
animal_bites_file = pd.read_csv('animalbites.csv')
df = pd.DataFrame(animal_bites_file)
df

Unnamed: 0,bite_date,SpeciesIDDesc,BreedIDDesc,GenderIDDesc,color,vaccination_yrs,vaccination_date,victim_zip,AdvIssuedYNDesc,WhereBittenIDDesc,quarantine_date,DispositionIDDesc,head_sent_date,release_date,ResultsIDDesc
0,1985-05-05 00:00:00,DOG,,FEMALE,LIG. BROWN,1.0,1985-06-20 00:00:00,40229,NO,BODY,1985-05-05 00:00:00,UNKNOWN,,,UNKNOWN
1,1986-02-12 00:00:00,DOG,,UNKNOWN,BRO & BLA,,,40218,NO,BODY,1986-02-12 00:00:00,UNKNOWN,,,UNKNOWN
2,1987-05-07 00:00:00,DOG,,UNKNOWN,,,,40219,NO,BODY,1990-05-07 00:00:00,UNKNOWN,,,UNKNOWN
3,1988-10-02 00:00:00,DOG,,MALE,BLA & BRO,,,,NO,BODY,1990-10-02 00:00:00,UNKNOWN,,,UNKNOWN
4,1989-08-29 00:00:00,DOG,,FEMALE,BLK-WHT,,,,NO,BODY,,UNKNOWN,,,UNKNOWN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8998,2017-09-05 00:00:00,DOG,,,,,,40243,,UNKNOWN,,,,,
8999,2017-09-07 00:00:00,DOG,POMERANIAN,MALE,RED,,,40204,,HEAD,,,,,
9000,2017-09-07 00:00:00,DOG,LABRADOR RETRIV,MALE,BROWN,,,47130,,UNKNOWN,,,,,
9001,2017-09-07 00:00:00,DOG,LABRADOR RETRIV,FEMALE,BLK WHT,,,40229,,BODY,,,,,


In [3]:
df["formatted_date"]= pd.to_datetime(df.iloc[:,0], errors="coerce")
df.head()

Unnamed: 0,bite_date,SpeciesIDDesc,BreedIDDesc,GenderIDDesc,color,vaccination_yrs,vaccination_date,victim_zip,AdvIssuedYNDesc,WhereBittenIDDesc,quarantine_date,DispositionIDDesc,head_sent_date,release_date,ResultsIDDesc,formatted_date
0,1985-05-05 00:00:00,DOG,,FEMALE,LIG. BROWN,1.0,1985-06-20 00:00:00,40229.0,NO,BODY,1985-05-05 00:00:00,UNKNOWN,,,UNKNOWN,1985-05-05
1,1986-02-12 00:00:00,DOG,,UNKNOWN,BRO & BLA,,,40218.0,NO,BODY,1986-02-12 00:00:00,UNKNOWN,,,UNKNOWN,1986-02-12
2,1987-05-07 00:00:00,DOG,,UNKNOWN,,,,40219.0,NO,BODY,1990-05-07 00:00:00,UNKNOWN,,,UNKNOWN,1987-05-07
3,1988-10-02 00:00:00,DOG,,MALE,BLA & BRO,,,,NO,BODY,1990-10-02 00:00:00,UNKNOWN,,,UNKNOWN,1988-10-02
4,1989-08-29 00:00:00,DOG,,FEMALE,BLK-WHT,,,,NO,BODY,,UNKNOWN,,,UNKNOWN,1989-08-29


In [4]:
narrowed_df = df[['formatted_date', 'SpeciesIDDesc', 'BreedIDDesc', 'GenderIDDesc', 'victim_zip', 'WhereBittenIDDesc']]
narrowed_df


Unnamed: 0,formatted_date,SpeciesIDDesc,BreedIDDesc,GenderIDDesc,victim_zip,WhereBittenIDDesc
0,1985-05-05,DOG,,FEMALE,40229,BODY
1,1986-02-12,DOG,,UNKNOWN,40218,BODY
2,1987-05-07,DOG,,UNKNOWN,40219,BODY
3,1988-10-02,DOG,,MALE,,BODY
4,1989-08-29,DOG,,FEMALE,,BODY
...,...,...,...,...,...,...
8998,2017-09-05,DOG,,,40243,UNKNOWN
8999,2017-09-07,DOG,POMERANIAN,MALE,40204,HEAD
9000,2017-09-07,DOG,LABRADOR RETRIV,MALE,47130,UNKNOWN
9001,2017-09-07,DOG,LABRADOR RETRIV,FEMALE,40229,BODY


In [5]:
data = narrowed_df.rename(columns={"formatted_date": "Date", "SpeciesIDDesc": "Animal Type",
                                  "BreedIDDesc": "Breed", "GenderIDDesc": "Gender",
                                  "victim_zip": "Victim Zip Code", "WhereBittenIDDesc": "Where Bitten"})

data

Unnamed: 0,Date,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten
0,1985-05-05,DOG,,FEMALE,40229,BODY
1,1986-02-12,DOG,,UNKNOWN,40218,BODY
2,1987-05-07,DOG,,UNKNOWN,40219,BODY
3,1988-10-02,DOG,,MALE,,BODY
4,1989-08-29,DOG,,FEMALE,,BODY
...,...,...,...,...,...,...
8998,2017-09-05,DOG,,,40243,UNKNOWN
8999,2017-09-07,DOG,POMERANIAN,MALE,40204,HEAD
9000,2017-09-07,DOG,LABRADOR RETRIV,MALE,47130,UNKNOWN
9001,2017-09-07,DOG,LABRADOR RETRIV,FEMALE,40229,BODY


In [6]:
# removing null values to avoid errors 
data.dropna(inplace = True)

In [7]:
# start stop and step variables
start, stop, step = 0, -6, 1

# converting to string data type 
data["Date"]= data["Date"].astype(str) 

# slicing till month element 
data["Year"]= data["Date"].str.slice(start, stop, step) 

# display 
data

Unnamed: 0,Date,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten,Year
1153,2010-12-24,DOG,GERM SHEPHERD,UNKNOWN,40291,BODY,2010
1202,2011-01-11,DOG,DACHSHUND,MALE,40207,BODY,2011
1208,2011-01-16,DOG,PIT BULL,UNKNOWN,40216,BODY,2011
1244,2011-01-29,DOG,SHIH TZU,MALE,40223,BODY,2011
1252,2011-02-04,DOG,SHIH TZU,FEMALE,40229,BODY,2011
...,...,...,...,...,...,...,...
8995,2017-09-04,DOG,BLACK LAB,MALE,40229,BODY,2017
8997,2017-09-06,DOG,CHICHAUHUA,FEMALE,40206,HEAD,2017
8999,2017-09-07,DOG,POMERANIAN,MALE,40204,HEAD,2017
9000,2017-09-07,DOG,LABRADOR RETRIV,MALE,47130,UNKNOWN,2017


In [8]:
data = data.drop(columns='Date')
data

Unnamed: 0,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten,Year
1153,DOG,GERM SHEPHERD,UNKNOWN,40291,BODY,2010
1202,DOG,DACHSHUND,MALE,40207,BODY,2011
1208,DOG,PIT BULL,UNKNOWN,40216,BODY,2011
1244,DOG,SHIH TZU,MALE,40223,BODY,2011
1252,DOG,SHIH TZU,FEMALE,40229,BODY,2011
...,...,...,...,...,...,...
8995,DOG,BLACK LAB,MALE,40229,BODY,2017
8997,DOG,CHICHAUHUA,FEMALE,40206,HEAD,2017
8999,DOG,POMERANIAN,MALE,40204,HEAD,2017
9000,DOG,LABRADOR RETRIV,MALE,47130,UNKNOWN,2017


In [9]:
data = data.sort_values('Gender')
data

Unnamed: 0,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten,Year
9001,DOG,LABRADOR RETRIV,FEMALE,40229,BODY,2017
7856,DOG,BULLMASTIFF,FEMALE,40219,BODY,2016
4891,DOG,PIT BULL,FEMALE,40245,BODY,2013
4914,DOG,BULL DOG,FEMALE,40220,BODY,2013
7852,DOG,GOLD RETRIEVER,FEMALE,40299,HEAD,2016
...,...,...,...,...,...,...
7655,DOG,SIBERAN HUSKY,UNKNOWN,40217,BODY,2016
7656,DOG,BOXER,UNKNOWN,40217,BODY,2016
7658,DOG,COLLIE,UNKNOWN,45142,BODY,2016
8084,DOG,BOSTON TERRIER,UNKNOWN,40291,UNKNOWN,2016


In [10]:
animals_types =data['Animal Type'].value_counts()
animals_types

DOG    2477
CAT       2
Name: Animal Type, dtype: int64

In [11]:
animals_gen=data['Gender'].value_counts()
animals_gen

MALE       1653
FEMALE      764
UNKNOWN      62
Name: Gender, dtype: int64

In [12]:
animals_desc=data['Breed'].value_counts()
animals_desc.head()

PIT BULL           707
GERM SHEPHERD      218
LABRADOR RETRIV    178
BOXER              124
CHICHAUHUA         109
Name: Breed, dtype: int64

In [13]:
bite_desc=data['Where Bitten'].value_counts()
bite_desc

BODY       1783
HEAD        457
UNKNOWN     239
Name: Where Bitten, dtype: int64

In [52]:
year_data = year_data.drop(year_data[(year_data.Year < "2007") & (year_data.Year > "2017")].index, inplace=False)
year_data.sort_values('Year')

Unnamed: 0,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten,Year
8570,DOG,RAT TERRIER,FEMALE,40299,UNKNOWN,1952
7580,DOG,PIT BULL,UNKNOWN,40291,BODY,1995
8816,DOG,PIT BULL,FEMALE,40229,BODY,2001
1153,DOG,GERM SHEPHERD,UNKNOWN,40291,BODY,2010
1716,DOG,GERM SHEPHERD,MALE,40219,BODY,2011
...,...,...,...,...,...,...
8265,DOG,PIT BULL,MALE,40213,BODY,2017
8261,DOG,BOXER,MALE,40218,BODY,2017
8473,DOG,PIT BULL,MALE,40291,BODY,2017
3012,DOG,PIT BULL,MALE,40272,BODY,2021


In [53]:
year_group = year_data.groupby("Year").count()
year_group

Unnamed: 0_level_0,Animal Type,Breed,Gender,Victim Zip Code,Where Bitten
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1952,1,1,1,1,1
1995,1,1,1,1,1
2001,1,1,1,1,1
2010,1,1,1,1,1
2011,322,322,322,322,322
2012,279,279,279,279,279
2013,213,213,213,213,213
2014,207,207,207,207,207
2015,484,484,484,484,484
2016,553,553,553,553,553
