# Exploratory Data Analysis

## Set up

In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
# Load data file
df = pd.read_csv('terry-clean.csv')
pd.set_option('display.max_columns', 999)
time = df.reported_time
df = df.drop(columns=['go_/_sc_num', 'terry_stop_id', 'officer_id', 'subject_id', 'reported_date',
                      'officer_squad', 'officer_yob', 'precinct', 'sector', 'beat'])
df.head()

In [3]:
df.info()

In [4]:
# Turn time into categories

# Observe the hour
df['reported_time'] = pd.to_datetime(df['reported_time'])
df['hour'] = df['reported_time'].dt.hour
df['watch'] = df['reported_time'].dt.hour

# Create bins
bins = [0, 6, 12, 18, 24]

# Use pd.cut()
bins_time = pd.cut(df.hour, bins, right=False)
print(bins_time)

df.watch = bins_time



# What's in the data?
Let's look at the individual plots for each feature.

In [7]:

for col in df.drop(columns=['final_call_type', 'initial_call_type', 'reported_time']).columns:
    ax = sns.countplot(y=col, data=df, edgecolor=sns.color_palette("dark", 3), palette='pastel',
                  order = df[col].value_counts().index);
    total = len(df[col])
    for p in ax.patches:
            percentage = '{:.1f}%'.format(100 * p.get_width()/total)
            x = p.get_x() + p.get_width() + 0.02
            y = p.get_y() + p.get_height()/2
            ax.annotate(percentage, (x, y))
    title = col.replace('_', ' ').title()
    plt.title(title, fontsize=18)
    plt.ylabel(title, fontsize=16)
    plt.xlabel('Count', fontsize=16)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    sns.despine(left=False)
    plt.show()


Needed a better plot for the reporting hour

In [6]:
sns.catplot(y='hour', kind="count", data=df);
plt.title('Reporting Hour')
plt.ylabel('Hour')
plt.xlabel('Count')
plt.show()

## Data Summary
The largest group sections:
* Age group: 26 - 35 (34%)
* Stop Resolution: Field Contact (39.1%)
* Weapon type: None (94.4%)
* Officer gender: Male (88.9%)
* Officer race: White (77.4%)
* Subject perceived race: White (49.1%)
* Subject perceived gender: Male (77.8%)
* Call type: 911 (43.3%)
* Arrest flag: No (96.0%)
* Frisk flag: No (76.9%)
* Watch: 6-Midnight (27.3%)
* Hour: 6 pm (6.3%)