# **NYPD Hate Crimes - EDA**

# **Importing Libraries and Data**
---

In [None]:
# importing required libraries
import numpy as np      # numerical operations
import pandas as pd     # dataframe handling
import seaborn as sns   # visualization
import matplotlib.pyplot as plt # visualization

import os

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# getting source file location
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        dir = os.path.join(dirname, filename)

In [None]:
# importing source file
df = pd.read_csv(dir)
df.head()

# **Data Pre-Processing**
---

## **1. Extracting required features**
We extract only those features we require into the dataframe and discard all the rest.

In [None]:
df.columns

In [None]:
df = df[['Complaint Year Number', 'Month Number', 'Record Create Date', 'Complaint Precinct Code', 'Patrol Borough Name', 'County', 
         'Law Code Category Description', 'Bias Motive Description', 'Offense Category']]

In [None]:
df.head()

## **2. Fixing time-stamps**

In [None]:
# Converting date to date-time format
df['Record Create Date'] = pd.to_datetime(df['Record Create Date'], format='%m/%d/%Y')

# Extract week of the day, date, month, and year into separate columns
df['Day'] = df['Record Create Date'].dt.strftime('%A')
df['Date'] = df['Record Create Date'].dt.day

# dropping source column
df.drop('Record Create Date', axis=1, inplace=True)

df = df[['Date', 'Month Number', 'Complaint Year Number', 'Day', 'Complaint Precinct Code', 'Patrol Borough Name', 'County', 
         'Law Code Category Description', 'Bias Motive Description', 'Offense Category']]

df.head()

## **3. Renaming Columns**
We rename the columns to something simple

In [None]:
df.columns = ['date', 'month', 'year', 'day', 'precinct', 'patrol', 'county', 'category', 'motive', 'offense']

In [None]:
df.head()

## **4. Reconfiguring Data**
We make the data a little simpler to comprehend by renaming the column data as per the directions provided below.

In [None]:
patrol_mapping = {
    'PATROL BORO BKLYN NORTH': 'Brooklyn_N',
    'PATROL BORO BKLYN SOUTH': 'Brooklyn_S',
    'PATROL BORO MAN SOUTH': 'Man_S',
    'PATROL BORO MAN NORTH': 'Man_N',
    'PATROL BORO QUEENS SOUTH': 'Queens_S', 
    'PATROL BORO QUEENS NORTH': 'Queens_N',
    'PATROL BORO BRONX': 'Bronx', 
    'PATROL BORO STATEN ISLAND': 'Staten_Is'
}

# Replace values using the dictionary
df['patrol'] = df['patrol'].replace(patrol_mapping)

offense_mapping = {
    'Religion/Religious Practice' : 'Religion', 
    'Race/Color' : 'Race',
    'Ethnicity/National Origin/Ancestry' : 'Origin', 
    'Gender' : 'Gender',
    'Sexual Orientation' : 'Orientation', 
    'Disability' : 'Diabled', 
    'Age' : 'Age'
}

# Replace values using the dictionary
df['offense'] = df['offense'].replace(offense_mapping)

df['motive'] = df['motive'].str.capitalize()
df['county'] = df['county'].str.capitalize()
df['category'] = df['category'].str.capitalize()

df.head()

# **Predicting Motive of Hate Related Crimes**
---

In [None]:
df = df[['date', 'month', 'year', 'precinct', 'patrol', 'county', 'category', 'offense', 'motive']]
df.head()

In [None]:
motive_mapping = {
    'Anti-jewish': 0,
    'Anti-white': 1,
    'Anti-hispanic': 2,
    'Anti-transgender': 3,
    'Anti-asian': 4,
    'Anti-male homosexual (gay)': 5,
    'Anti-muslim': 6,
    'Anti-female homosexual (lesbian)': 7,
    'Anti-black': 8,
    'Anti-gender non-conforming': 9,
    'Anti-lgbt (mixed group)': 10,
    'Anti-arab': 11,
    'Anti-physical disability': 12,
    'Anti-other ethnicity': 13,
    'Anti-protestant': 14,
    'Anti-catholic': 15,
    'Anti-religious practice generally': 16,
    'Anti-other religion': 17,
    'Anti-female': 18,
    'Anti-jehovahs witness': 19,
    'Anti-eastern orthodox': 20,
    'Anti-buddhist': 21,
    'Anti-multi-racial groups': 22,
    'Anti-hindu': 23,
    'Anti-sikh': 24,
    '60 yrs and older': 25
}

# Replace values using the dictionaries
df['motive'] = df['motive'].replace(motive_mapping)

In [None]:
df.head()

In [None]:
predict_y  = pd.get_dummies(df, columns = ['date', 'month', 'year', 'precinct', 'patrol', 'county', 'category', 'offense'])

In [None]:
predict_y.shape

---
---
# ***Exploratory Data Analysis(EDA)***
---
---

## **1. Temporal Analysis**

Predicting County

In [None]:
patrol_mapping = {
    'Brooklyn(S)' : 0,
    'Brooklyn(N)' : 1,
    'Man(S)' : 2 ,
    'Man(N)' : 3,
    'Queens(S)' : 4, 
    'Queens(N)' : 5,
    'Bronx' : 6, 
    'Staten' : 7
}

# Replace values using the dictionary
df['patrol'] = df['patrol'].replace(patrol_mapping)

In [None]:
df['catogory'].unique()

In [None]:
df.head()