# Default Import

In [1]:
from ris import pysqldb
from IPython.display import Markdown, clear_output
from sqlalchemy import create_engine
import ris
import getpass
import datetime 
import pandas as pd
import numpy as np
import os
import requests

timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
ts = datetime.datetime.now().strftime('%Y-%m-%d')

Markdown('<strong>Notebook run on: {} | by {} | Using ris library version: {} | File Location: {}'.format(
    timestamp, getpass.getuser(), ris.__version__, os.getcwd()
))


<strong>Notebook run on: 2020-07-09 19:16 | by soge | Using ris library version: 1.5.6 | File Location: E:\RIS\Staff Folders\Samuel\Requests\AG\Bicycle_Violation_Disc

# Db connections

In [2]:
try: 
    db.params['user']
except:
    db = pysqldb.DbConnect(type='PG', server='dotdevrhpgsql01', database='ris')


Additional database connection details required:
User name (ris):soge
Password (ris)········
Database connection (PG) to ris on dotdevrhpgsql01 - user: soge 
Connection established 2020-07-09 19:16:40


In [3]:
#sip = pysqldb.DbConnect(server='dotpgsql01', database='sip', type='PG', user = db.params['user'], password = db.params['password'])
#forms = pysqldb.DbConnect(server='dot55sql01', database='forms', type='MS', user = 'arcgis', password = 'arcgis')

In [4]:
data = pd.read_csv('NYPD_Criminal_Court_Summons__Historic.csv')

In [5]:
data['YEAR'] = data['SUMMONS_DATE'].apply(lambda x:x[6::])

# TOTAL STATS

In [6]:
nulls = data.loc[data['RACE'].isnull()].shape[0]
unknown = data.loc[data['RACE']=='UNKNOWN'].shape[0]
knowns = data.loc[(data['RACE'].notnull()) & (data['RACE'] != 'UNKNOWN')].shape[0]
tot = nulls + unknown + knowns

In [7]:
all_data = [nulls,unknown, knowns,tot]

In [8]:
all_data

[2720, 250004, 2162, 254886]

In [9]:
percents = {'nulls': (float(nulls)/float(tot)) * 100, 'unknown': (float(unknown)/float(tot)) * 100, 'knowns' : (float(knowns)/float(tot)) * 100}

In [10]:
percents

{'knowns': 0.8482223425374481,
 'nulls': 1.0671437426928116,
 'unknown': 98.08463391476974}

# STATS BY YEAR

In [11]:
nulls = data.loc[data['RACE'].isnull()].groupby(['YEAR'])['SUMMONS_KEY'].count()
nulls

YEAR
2011       1
2012       1
2014       2
2015    2298
2016     381
2017      36
2018       1
Name: SUMMONS_KEY, dtype: int64

In [12]:
unknowns = data.loc[data['RACE']=='UNKNOWN'].groupby(['YEAR'])['SUMMONS_KEY'].count()
unknowns

YEAR
2006    21407
2007    28069
2008    29083
2009    30333
2010    33217
2011    38663
2012    36853
2013    24964
2014     6041
2015      883
2016      437
2017       47
2018        3
2019        4
Name: SUMMONS_KEY, dtype: int64

In [13]:
knowns = data.loc[(data['RACE'].notnull()) & (data['RACE'] != 'UNKNOWN')].groupby(['YEAR'])['SUMMONS_KEY'].count()
knowns

YEAR
2010      1
2013      1
2016    806
2017    771
2018    187
2019    396
Name: SUMMONS_KEY, dtype: int64

In [14]:
n = pd.DataFrame(nulls).rename(columns={'SUMMONS_KEY':'nulls'})
u = pd.DataFrame(unknowns).rename(columns={'SUMMONS_KEY':'unknowns'})
k = pd.DataFrame(knowns).rename(columns={'SUMMONS_KEY':'knowns'})

In [15]:
tot_stats = pd.merge(pd.merge(n,u, on='YEAR', how= 'outer', sort = True), k, on='YEAR', how= 'outer', sort = True)

In [16]:
tot_stats['TOTAL'] = tot_stats.sum(axis=1)
tot_stats['nulls_%'] = (tot_stats.nulls/tot_stats.TOTAL)*100
tot_stats['unknowns_%'] = (tot_stats.unknowns/tot_stats.TOTAL)*100
tot_stats['knowns%'] = (tot_stats.knowns/tot_stats.TOTAL)*100

In [17]:
tot_stats = tot_stats.reindex(columns=['nulls','nulls_%','unknowns','unknowns_%','knowns','knowns_%','TOTAL'])
tot_stats

Unnamed: 0_level_0,nulls,nulls_%,unknowns,unknowns_%,knowns,knowns_%,TOTAL
YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006,,,21407,100.0,,,21407.0
2007,,,28069,100.0,,,28069.0
2008,,,29083,100.0,,,29083.0
2009,,,30333,100.0,,,30333.0
2010,,,33217,99.99699,1.0,,33218.0
2011,1.0,0.002586,38663,99.997414,,,38664.0
2012,1.0,0.002713,36853,99.997287,,,36854.0
2013,,,24964,99.995994,1.0,,24965.0
2014,2.0,0.033096,6041,99.966904,,,6043.0
2015,2298.0,72.241434,883,27.758566,,,3181.0


# OFFENSES WITH KNOWN RACE

In [18]:
off_race = data.loc[(data['RACE'].notnull()) & (data['RACE'] != 'UNKNOWN')].reset_index()

In [19]:
off_race

Unnamed: 0,index,SUMMONS_KEY,SUMMONS_DATE,OFFENSE_DESCRIPTION,LAW_SECTION_NUMBER,LAW_DESCRIPTION,SUMMONS_CATEGORY_TYPE,AGE_GROUP,SEX,RACE,JURISDICTION_CODE,BORO,PRECINCT_OF_OCCUR,X_COORDINATE_CD,Y_COORDINATE_CD,Latitude,Longitude,Lon_Lat,YEAR
0,0,157786187,10/17/2016,BIKE IN PARK,1-05I,Park Regs,BIKE,45-64,M,BLACK HISPANIC,0,MANHATTAN,22,994010,224718,40.783478,-73.964758,POINT (-73.96475834099994 40.783478055000046),2016
1,16,187925579,09/21/2018,BICYCLE ON SIDEWALK,19-176,Administrative Code,BIKE,18-24,M,BLACK,2,BRONX,43,1017215,240131,40.825726,-73.880890,POINT (-73.88089007 40.82572638),2018
2,17,191430905,12/21/2018,BICYCLE INFRACTION (COMMERCIAL),10-157,Administrative Code,BIKE,45-64,M,ASIAN / PACIFIC ISLANDER,0,BROOKLYN,70,996508,176285,40.650538,-73.955826,POINT (-73.95582632 40.65053799),2018
3,18,189016603,10/20/2018,BICYCLE ON SIDEWALK,19-176,Administrative Code,BIKE,18-24,M,WHITE HISPANIC,0,BROOKLYN,83,1006517,193323,40.697284,-73.919700,POINT (-73.91969982 40.69728395),2018
4,19,187390989,09/09/2018,BICYCLE ON SIDEWALK,19-176,Administrative Code,BIKE,18-24,M,WHITE HISPANIC,0,BRONX,52,1011443,253629,40.862794,-73.901691,POINT (-73.9016911 40.86279408),2018
5,20,188099418,09/26/2018,BICYCLE INFRACTION (COMMERCIAL),10-157,Administrative Code,BIKE,18-24,M,ASIAN / PACIFIC ISLANDER,0,BROOKLYN,70,992551,173812,40.643755,-73.970090,POINT (-73.97008951 40.64375473),2018
6,21,189582318,11/04/2018,BICYCLE INFRACTION (COMMERCIAL),10-157,Administrative Code,BIKE,18-24,M,OTHER,0,NEW YORK,5,986645,201033,40.718474,-73.991362,POINT (-73.9913616 40.71847377),2018
7,22,191677836,12/30/2018,BICYCLE INFRACTION (COMMERCIAL),10-157,Administrative Code,BIKE,25-44,M,WHITE HISPANIC,0,BRONX,52,1012928,255180,40.867046,-73.896316,POINT (-73.89631578 40.8670464),2018
8,23,187679278,09/17/2018,BICYCLE ON SIDEWALK,19-176,Administrative Code,BIKE,25-44,M,WHITE HISPANIC,0,BRONX,44,1006244,246145,40.842267,-73.920511,POINT (-73.92051149 40.84226721),2018
9,24,187391037,09/09/2018,BICYCLE ON SIDEWALK,19-176,Administrative Code,BIKE,45-64,M,BLACK,0,BRONX,42,1012036,243157,40.834050,-73.899591,POINT (-73.89959064 40.83404969),2018
