## Import Libraries and Data

In [1]:
# Import libraries
import csv
import pandas as pd
import numpy as np

In [2]:
# Create pandas DataFrame from CSV file
data = pd.read_csv('NYPD_Hate_Crimes.csv')
df = pd.DataFrame(data)

## Explore and Clean Data

In [3]:
# Preview data
print(df.head(10))

   Full Complaint ID  Complaint Year Number  Month Number Record Create Date   
0    201904612204817                   2019             2         02/08/2019  \
1    201904812229517                   2019             3         03/09/2019   
2    201904812231317                   2019             3         03/10/2019   
3    201904812226617                   2019             3         03/08/2019   
4    201904212405517                   2019             5         05/11/2019   
5    201904212405517                   2019             5         05/11/2019   
6    201904212405517                   2019             5         05/11/2019   
7    201904012711317                   2019             6         06/01/2019   
8    201904912495217                   2019             7         07/09/2019   
9    201904712851617                   2019             8         08/05/2019   

   Complaint Precinct Code Patrol Borough Name County   
0                       46   PATROL BORO BRONX  BRONX  \
1    

In [4]:
# Print data types
print(df.dtypes)

Full Complaint ID                 int64
Complaint Year Number             int64
Month Number                      int64
Record Create Date               object
Complaint Precinct Code           int64
Patrol Borough Name              object
County                           object
Law Code Category Description    object
Offense Description              object
PD Code Description              object
Bias Motive Description          object
Offense Category                 object
Arrest Date                      object
Arrest Id                        object
dtype: object


In [5]:
# Print list of Bias Motivation Descriptions and number of occurrences
motive = df['Bias Motive Description'].value_counts(ascending=True)
print(motive)

# Note: The descriptions appear to have categories for "ANTI_BLACK" and "ANTI-BLACK"; the former should be changed to the latter for consistency

Bias Motive Description
ANTI-PROTESTANT                        1
ANTI_BLACK                             1
ANTI-PHYSICAL DISABILITY               1
60 YRS AND OLDER                       1
ANTI-EASTERN ORTHODOX                  2
ANTI-BUDDHIST                          3
ANTI-JEHOVAHS WITNESS                  3
ANTI-MULTI-RACIAL GROUPS               3
ANTI-RELIGIOUS PRACTICE GENERALLY      3
ANTI-SIKH                              4
ANTI-OTHER RELIGION                    5
ANTI-HINDU                             6
ANTI-ARAB                              7
ANTI-GENDER NON-CONFORMING             7
ANTI-LGBT (MIXED GROUP)               11
ANTI-FEMALE                           14
ANTI-FEMALE HOMOSEXUAL (LESBIAN)      31
ANTI-HISPANIC                         37
ANTI-CATHOLIC                         40
ANTI-TRANSGENDER                      53
ANTI-MUSLIM                           53
ANTI-OTHER ETHNICITY                  80
ANTI-WHITE                            84
ANTI-BLACK                       

In [6]:
# Replace categorization error/type in the "Bias Motive Description" column
df['Bias Motive Description'] = df['Bias Motive Description'].replace('ANTI_BLACK', 'ANTI-BLACK')

print(df['Bias Motive Description'].value_counts(ascending=True))


Bias Motive Description
ANTI-PROTESTANT                        1
ANTI-PHYSICAL DISABILITY               1
60 YRS AND OLDER                       1
ANTI-EASTERN ORTHODOX                  2
ANTI-BUDDHIST                          3
ANTI-JEHOVAHS WITNESS                  3
ANTI-MULTI-RACIAL GROUPS               3
ANTI-RELIGIOUS PRACTICE GENERALLY      3
ANTI-SIKH                              4
ANTI-OTHER RELIGION                    5
ANTI-HINDU                             6
ANTI-GENDER NON-CONFORMING             7
ANTI-ARAB                              7
ANTI-LGBT (MIXED GROUP)               11
ANTI-FEMALE                           14
ANTI-FEMALE HOMOSEXUAL (LESBIAN)      31
ANTI-HISPANIC                         37
ANTI-CATHOLIC                         40
ANTI-TRANSGENDER                      53
ANTI-MUSLIM                           53
ANTI-OTHER ETHNICITY                  80
ANTI-WHITE                            84
ANTI-BLACK                           185
ANTI-MALE HOMOSEXUAL (GAY)       

In [7]:
# Print list of Offense Category values and number of occurrences
offense = df['Offense Category'].value_counts(ascending=True)
print(offense)

Offense Category
Age                                      1
Disability                               1
Race/Religious Practice                  2
Gender                                  74
Ethnicity/National Origin/Ancestry     124
Sexual Orientation                     295
Race/Color                             554
Religion/Religious Practice           1049
Name: count, dtype: int64


In [8]:
# Print rows where Offense Category listed is Sexual Orientation
print(df[df['Offense Category'] == 'Sexual Orientation'])

      Full Complaint ID  Complaint Year Number  Month Number   
0       201904612204817                   2019             2  \
13      202004412215017                   2020             2   
18      202004012762317                   2020             7   
19      202004012762317                   2020             7   
21      202004812794517                   2020            10   
...                 ...                    ...           ...   
2027    202311012279217                   2023             3   
2035    202210412412917                   2022             5   
2084    202304312126217                   2023             1   
2086    202310812182217                   2023             2   
2097    202301012126417                   2023             2   

     Record Create Date  Complaint Precinct Code       Patrol Borough Name   
0            02/08/2019                       46         PATROL BORO BRONX  \
13           02/06/2020                       44         PATROL BORO BRONX 