In [2]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json

In [4]:
# Load the restaurant ethnicities into a DataFrame
dog_df = pd.read_csv("../Resources/DOHMH_Dog_Bite_Data.csv")

# Display sample data
dog_df.head()

Unnamed: 0,UniqueID,DateOfBite,Species,Breed,Age,Gender,SpayNeuter,Borough,ZipCode
0,1,January 01 2018,DOG,UNKNOWN,,U,False,Brooklyn,11220.0
1,2,January 04 2018,DOG,UNKNOWN,,U,False,Brooklyn,
2,3,January 06 2018,DOG,Pit Bull,,U,False,Brooklyn,11224.0
3,4,January 08 2018,DOG,Mixed/Other,4.0,M,False,Brooklyn,11231.0
4,5,January 09 2018,DOG,Pit Bull,,U,False,Brooklyn,11224.0


In [231]:
# found a '?' value in the ZipCode column, so eliminating that row from the data set.

dog_df = dog_df.loc[dog_df["ZipCode"] != '?']
dog_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22662 entries, 0 to 22662
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   UniqueID    22662 non-null  int64 
 1   DateOfBite  22662 non-null  object
 2   Species     22662 non-null  object
 3   Breed       20444 non-null  object
 4   Age         11442 non-null  object
 5   Gender      22662 non-null  object
 6   SpayNeuter  22662 non-null  bool  
 7   Borough     22662 non-null  object
 8   ZipCode     16804 non-null  object
dtypes: bool(1), int64(1), object(7)
memory usage: 1.6+ MB


In [232]:
#Changing the datatype for the DateOfBite column from text to datetime.

dog_df['DateOfBite'] = pd.to_datetime(dog_df['DateOfBite'], errors='coerce')
dog_df['DateOfBite'][:5]

0   2018-01-01
1   2018-01-04
2   2018-01-06
3   2018-01-08
4   2018-01-09
Name: DateOfBite, dtype: datetime64[ns]

In [233]:
# split to year, month, day, dayofweek, weekname and droped the DateOFBite, Species column.

dog_df['Year'] = dog_df['DateOfBite'].dt.year
dog_df['Month'] = dog_df['DateOfBite'].dt.month
dog_df['Day'] = dog_df['DateOfBite'].dt.day 
dog_df['DayOfWeek'] = dog_df['DateOfBite'].dt.dayofweek # day of the week starts(Mon = 0)
dog_df = dog_df.drop(['DateOfBite'], axis=1)
dog_df= dog_df.drop(['Species'], axis=1)
dog_df.head()

Unnamed: 0,UniqueID,Breed,Age,Gender,SpayNeuter,Borough,ZipCode,Year,Month,Day,DayOfWeek
0,1,UNKNOWN,,U,False,Brooklyn,11220.0,2018,1,1,0
1,2,UNKNOWN,,U,False,Brooklyn,,2018,1,4,3
2,3,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,6,5
3,4,Mixed/Other,4.0,M,False,Brooklyn,11231.0,2018,1,8,0
4,5,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,9,1


In [234]:
# Create a dictionary mapping months to seasons

season = {12: 'Winter', 1: 'Winter', 2: 'Winter', 3: 'Spring', 4: 'Spring', 5: 'Spring',
           6: 'Summer', 7: 'Summer', 8: 'Summer', 9: 'Fall', 10: 'Fall', 11: 'Fall'}

# Create a new column 'Seasons' based on the mapping

dog_df['Seasons'] = dog_df['Month'].map(season)

dog_df.head()

Unnamed: 0,UniqueID,Breed,Age,Gender,SpayNeuter,Borough,ZipCode,Year,Month,Day,DayOfWeek,Seasons
0,1,UNKNOWN,,U,False,Brooklyn,11220.0,2018,1,1,0,Winter
1,2,UNKNOWN,,U,False,Brooklyn,,2018,1,4,3,Winter
2,3,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,6,5,Winter
3,4,Mixed/Other,4.0,M,False,Brooklyn,11231.0,2018,1,8,0,Winter
4,5,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,9,1,Winter


In [235]:
a = dog_df.groupby(["Seasons"])["Month"].count()
a

Seasons
Fall      5508
Spring    5626
Summer    6989
Winter    4539
Name: Month, dtype: int64

In [237]:
# Create a dictionary mapping dayofweek to seasons
weekname = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday',
           6: 'Sunday'}

# Create a new column 'season' based on the mapping
dog_df['WeekName'] = dog_df['DayOfWeek'].map(weekname)

dog_df.head()

Unnamed: 0,UniqueID,Breed,Age,Gender,SpayNeuter,Borough,ZipCode,Year,Month,Day,DayOfWeek,Seasons,WeekName
0,1,UNKNOWN,,U,False,Brooklyn,11220.0,2018,1,1,0,Winter,Monday
1,2,UNKNOWN,,U,False,Brooklyn,,2018,1,4,3,Winter,Thursday
2,3,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,6,5,Winter,Saturday
3,4,Mixed/Other,4.0,M,False,Brooklyn,11231.0,2018,1,8,0,Winter,Monday
4,5,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,9,1,Winter,Tuesday


In [238]:
# Establish the bins.

size_bins= [0,4,6]
labels = ["weekday" , "weekend"]

In [239]:
# Categorize the weenday based on the bins
# Use `pd.cut` on the "DayOfWeek" column of the `dog_df` DataFrame.

dog_df["WeekDay"]=pd.cut(dog_df["DayOfWeek"],size_bins,labels=labels,include_lowest=True)
dog_df.head(5).sort_values( by = "DayOfWeek" ,ascending=False)


Unnamed: 0,UniqueID,Breed,Age,Gender,SpayNeuter,Borough,ZipCode,Year,Month,Day,DayOfWeek,Seasons,WeekName,WeekDay
2,3,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,6,5,Winter,Saturday,weekend
1,2,UNKNOWN,,U,False,Brooklyn,,2018,1,4,3,Winter,Thursday,weekday
4,5,Pit Bull,,U,False,Brooklyn,11224.0,2018,1,9,1,Winter,Tuesday,weekday
0,1,UNKNOWN,,U,False,Brooklyn,11220.0,2018,1,1,0,Winter,Monday,weekday
3,4,Mixed/Other,4.0,M,False,Brooklyn,11231.0,2018,1,8,0,Winter,Monday,weekday


In [203]:
null= dog_df.isnull().sum()
null


UniqueID          0
DateOfBite        0
Species           0
Breed          2218
Age           11221
Gender            0
SpayNeuter        0
Borough           0
ZipCode        5858
dtype: int64

In [240]:
#I am creating a leaderboard for the plots with weekday, weekname seasons by borough, age, breed 
b= dog_df.groupby(['Borough'])["ZipCode"].count()
b

Borough
Bronx            2910
Brooklyn         4192
Manhattan        3410
Other             451
Queens           4494
Staten Island    1347
Name: ZipCode, dtype: int64

In [241]:
dog_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22662 entries, 0 to 22662
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   UniqueID    22662 non-null  int64   
 1   Breed       20444 non-null  object  
 2   Age         11442 non-null  object  
 3   Gender      22662 non-null  object  
 4   SpayNeuter  22662 non-null  bool    
 5   Borough     22662 non-null  object  
 6   ZipCode     16804 non-null  object  
 7   Year        22662 non-null  int32   
 8   Month       22662 non-null  int32   
 9   Day         22662 non-null  int32   
 10  DayOfWeek   22662 non-null  int32   
 11  Seasons     22662 non-null  object  
 12  WeekName    22662 non-null  object  
 13  WeekDay     22662 non-null  category
dtypes: bool(1), category(1), int32(4), int64(1), object(7)
memory usage: 1.9+ MB


In [244]:
dog_df.isnull().sum()


UniqueID          0
Breed          2218
Age           11220
Gender            0
SpayNeuter        0
Borough           0
ZipCode        5858
Year              0
Month             0
Day               0
DayOfWeek         0
Seasons           0
WeekName          0
WeekDay           0
dtype: int64