In [None]:
import pandas as pd
from ipywidgets import interact
import numpy as np
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource, NumeralTickFormatter
from bokeh.palettes import Spectral11
from bokeh.transform import factor_cmap
import os
import matplotlib.pylab as plb
output_notebook()

In [None]:
crimeindex = pd.read_csv('Index_Crimes_by_County_and_Agency__Beginning_1990.csv')
crimeindex.tail()

In [None]:
nyc = ['New York', 'New York City', 'Kings', 'Queens', 'Bronx', 'Richmond']
maskci1 = crimeindex['County'].isin(nyc)

In [None]:
nyvalues = crimeindex[maskci1].sort_values(by = ['Year'])
nyvalues = nyvalues.groupby('Year').sum()
nyvalues

In [None]:
x = nyvalues[['Murder','Rape','Robbery','Aggravated Assault','Burglary','Larceny','Motor Vehicle Theft']]
#x

In [None]:
year = x.index.tolist()
year = [str(yearo) for yearo in year]
crimes = x.columns.tolist()

data = {'crimes': crimes}
for yearos in x.index:
    g = x.loc[yearos].tolist()
    data[str(yearos)] = g

framer = pd.DataFrame.from_dict(data)
summer = framer['2008'].transpose().sum()
g = [x/summer for x in data['2008']]

In [None]:
source = ColumnDataSource(data=dict(crimes = crimes, counts=g))

p = figure(x_range=crimes, plot_height=600, plot_width = 800, toolbar_location=None, title="Crime Counts")
p.vbar(x='crimes', top='counts', width=0.9, source=source, legend="crimes",
       line_color='white', fill_color=factor_cmap('crimes', palette=Spectral11, factors=crimes))

p.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 1.2
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

r = p.vbar(x='crimes', top='counts', width=0.9, source=source, legend="crimes",
       line_color='white', fill_color=factor_cmap('crimes', palette=Spectral11, factors=crimes))

In [None]:
row_template = "{:^20} | {:^40} | {:10}"
def prettyprint(listofcrime, listofpercentages, listoftotalnumbers):
    tupple = tuple(zip(listofcrime, listofpercentages, listoftotalnumbers))
    print()
    print(row_template.format("Crime", "Percentage Of All Crimes In NYC", "Total Number"))
    print("*" * 20)
    for crime, percent, numbers in tupple:
        print(row_template.format(crime, percent, numbers))

def updateyear(year):
    numberos = framer[year].tolist()
    summer = framer[year].transpose().sum()
    crime = framer["crimes"].tolist()
    g = [x/summer for x in data[year]]
    g2 = [str(round((i * 100), 1)) + "%" for i in g]
    prettyprint(crime, g2, numberos)
    return g

def update(Year):
    if Year == Year:
        r.data_source.data['counts'] = updateyear(Year)
        push_notebook()

In [None]:
show(p, notebook_handle=True)

In [None]:
interact(update, Year=year)

In [None]:
from statsmodels.tsa.ar_model import AR
dicter = {}
checker = {}

for column in x.columns:
    model = AR(x[column].tolist())
    model_fit = model.fit()
    yhat = model_fit.predict(len(x), len(x))
    dicter[column] = yhat

dicter = pd.DataFrame.from_dict(dicter)
dicter.set_index([pd.Index(["2018 Auto Regression Forecast"])])

In [None]:
x['Year'] = x.index.tolist()
x

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.style as style

police = pd.read_csv('Law_Enforcement_Personnel_by_Agency___Beginning_2007.csv')
police['County'].unique()

In [None]:
maskp1 = police['County'] == "New York City"
numofficers = police[maskp1][['Grand Total', 'Year']].groupby('Year').sum()
%matplotlib inline
YearP = numofficers.index.tolist()
Officers = numofficers['Grand Total'].tolist()

fig= plt.figure(figsize=(10,4))
plt.rcParams['font.family'] = 'Times New Roman'
plt.plot(YearP, Officers)
plt.title('Number Of NYC Police Officers On Staff by Year')
plt.xlabel('Year')
plt.ylabel("Number of Officers")
plt.xticks(np.arange(2007, 2019))
plt.show()
palette=("Blues_d")

In [None]:
arrests = pd.read_csv('Adult_Arrests_by_County___Beginning_1970.csv')
print(arrests['County'].unique())
print(arrests.columns)

In [None]:
maska1 = arrests['County'].isin(nyc)
maska2 = arrests['Year'] >= 1990
maska3 = arrests['Year'] <= 2017
crimesa = arrests[maska1 & maska2 & maska3].groupby('Year').sum()[['Drug Felony', 'DWI Felony', 'Drug Misd', 'DWI Misd']]
crimesa['Drug Related Crimes'] = crimesa['Drug Felony'] + crimesa['Drug Misd']
crimesa['DWI Crimes'] = crimesa['DWI Felony'] + crimesa['DWI Misd']
crimesa

In [None]:
wages = pd.read_csv('Quarterly_Census_of_Employment_and_Wages_Annual_Data__Beginning_2000.csv')
mask1 = wages['Area'] == 'New York City'
mask2 = wages['Area Type'] == 'Metropolitan Statistical Area'
mask3 = wages['NAICS Title'] == 'Total, All Industries'
nycwages = wages[mask1 & mask2 & mask3]
nycwages = nycwages.sort_values(by = ['Year'], ascending=True)
nycwages

In [None]:
maskscatter1 = x['Year'] >= 2000
maskscatter2 = nycwages['Year'] <= 2017

xx = nycwages[maskscatter2]['Annual Average Salary                                                                                                                                                  '].tolist()
yy = x[maskscatter1]['Larceny'].tolist()


In [None]:
fig= plt.figure(figsize=(10,5))
plt.scatter(xx, yy)

plt.rcParams['font.family'] = 'Times New Roman'
plt.title('Average Salary vs Level of Larceny in NYC Between 2000 and 2017')
plt.xlabel('Average Salary')
plt.ylabel("Cases of Larceny")

plt.show()
print("The Correlation Between Cases of Larceny and Average Salary in NYC Between 2000 and 2017 is", np.corrcoef(xx,yy)[0,1])

In [None]:
x3 = numofficers['Grand Total'].iloc[0:11].tolist()
y3 = crimesa["Drug Related Crimes"].iloc[17:].tolist()

In [None]:
fig= plt.figure(figsize=(10,5))
plt.scatter(x3, y3)

plt.rcParams['font.family'] = 'Times New Roman'
plt.title('Active Police Force vs Drug Arrests in NYC from 2007 - 2017')
plt.xlabel('Number of Active Officers')
plt.ylabel("Drug Related Arrests")

plt.show()
print("The Correlation Between The Number of Active Officers and Drug Related Arrests in NYC Between 2000 and 2017 is", np.corrcoef(x3,y3)[0,1])