In [29]:
import pandas as pd
import numpy as np
import pip
import os
from tqdm.notebook import tqdm, trange
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
try:
    __import__("xarray")
except ImportError:
    pip.main(['install', "xarray"])

import xarray as xr

pd.options.display.max_columns = None
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.transform import factor_cmap, factor_mark

# Investigating Relationship Between Weather and Criminal Offense Trends in New Orleans

### Hayden Outlaw, Joe Wagner | [Tulane CMPS 6790 Data Science](https://nmattei.github.io/cmps6790/) | Fall 2023

### https://outlawhayden.github.io/weather-crime




## Project Outline

## Collaboration Plan
Lorem Ipsum

## New Orleans Police Department Calls for Service
-------
https://datadriven.nola.gov/home/
From 2011 to Present
*WRITE OUT*

In [4]:
data_folder = '../data/calls_for_service'
csv_files = [f for f in os.listdir(data_folder) if f.endswith('csv')]
if 'calls_master.csv' not in csv_files:
    calls_for_service = pd.DataFrame()

    for f in tqdm(csv_files, desc = "Combining Files"):
        file_path = os.path.join(data_folder, f)
        df = pd.read_csv(file_path)
        calls_for_service = pd.concat([calls_for_service, df], ignore_index = True)
    calls_for_service.to_csv('../data/calls_for_service/calls_master.csv')
else:
    calls_for_service = pd.read_csv(os.path.join(data_folder, 'calls_master.csv'))


  calls_for_service = pd.read_csv(os.path.join(data_folder, 'calls_master.csv'))


In [5]:
calls_for_service.head()

Unnamed: 0.1,Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,MapX,MapY,TimeCreate,TimeDispatch,TimeArrive,TimeClosed,Disposition,DispositionText,BLOCK_ADDRESS,Zip,PoliceDistrict,Location,InitialType,InitialTypeText,InitialPriority,SelfInitiated,Beat,Type,TimeArrival
0,0,A0052411,103M,MENTAL PATIENT,2B,37369000.0,3513814.0,01/01/2011 04:32:24 AM,01/01/2011 04:35:01 AM,01/01/2011 04:44:15 AM,01/01/2011 05:15:45 AM,NAT,NECESSARY ACTION TAKEN,016XX Monroe St,70118.0,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
1,1,B1164911,21,COMPLAINT OTHER,1H,37369000.0,3513814.0,02/08/2011 12:00:51 PM,02/08/2011 12:00:51 PM,02/08/2011 12:00:51 PM,02/08/2011 12:19:58 PM,NAT,NECESSARY ACTION TAKEN,001XX Royal Street,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
2,2,A0106111,103,DISTURBANCE (OTHER),1C,37369000.0,3513814.0,01/01/2011 03:41:48 PM,01/01/2011 03:41:48 PM,01/01/2011 03:41:48 PM,01/01/2011 03:43:30 PM,NAT,NECESSARY ACTION TAKEN,040XX S Carrollton,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
3,3,A0144311,18,TRAFFIC INCIDENT,1H,37369000.0,3513814.0,01/01/2011 07:48:40 PM,01/01/2011 07:48:40 PM,01/01/2011 07:48:40 PM,01/01/2011 07:58:44 PM,NAT,NECESSARY ACTION TAKEN,Rampart & Esplanade Ave,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
4,4,C2899911,21L,LOST OR STOLEN,0E,37369000.0,3513814.0,03/19/2011 04:05:57 PM,03/19/2011 04:05:58 PM,03/19/2011 04:05:58 PM,03/19/2011 04:23:30 PM,RTF,REPORT TO FOLLOW,003XX O'Keefe,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,


In [6]:
calls_for_service.shape

(5622042, 24)

In [7]:
calls_for_service["TypeText"].unique()

array(['MENTAL PATIENT', 'COMPLAINT OTHER', 'DISTURBANCE (OTHER)',
       'TRAFFIC  INCIDENT', 'LOST OR STOLEN', 'MISSING JUVENILE',
       'SUICIDE', 'SIMPLE RAPE', 'AUTO ACCIDENT', 'FUGITIVE ATTTACHMENT',
       'DISCHARGING FIREARMS', 'FIREWORKS', 'SUSPICIOUS PERSON',
       'SIMPLE CRIMINAL DAMA', 'PROWLER', 'FIRE', 'AMBULANCE REQUEST',
       'FIGHT', 'BURGLAR ALARM, SILEN', 'SIMPLE BATTERY', 'HIT & RUN',
       'AUTO THEFT', 'SIMPLE ROBBERY - PUR', 'ILLEGAL CARRYING OF',
       'SILENT E-911 CALL', 'HIT & RUN  WITH INJU',
       'DOMESTIC DISTURBANCE', 'SIMPLE BATTERY DOMES', 'MISSING ADULT',
       'AUTO ACCIDENT WITH I', 'THEFT', 'RECKLESS DRIVING',
       'AGGRAVATED CRIMINAL', 'AGGRAVATED BATTERY', 'DRUG VIOLATIONS',
       'DRIVING WHILE UNDER', 'AGGRAVATED BATTERY B',
       'THEFT FROM INTERIOR', 'SIMPLE ARSON', 'BURGLAR ALARM, LOCAL',
       'RESIDENCE BURGLARY', 'SIMPLE ROBBERY', 'OBSCENITY ,EXPOSING',
       'AGGRAVATED BATTERY D', 'DAILY WALKING BEAT L', 'DEATH',
     

## NOAA Weather Station Data
-------
NOAA NCEI Data Ordering Rquest
Custom GHCN-Daily CSV File
From https://www.ncei.noaa.gov/cdo-web


In [8]:
weather = pd.read_csv('../data/weather/NCEI_CDO.csv', low_memory = False)

In [9]:
weather.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,AWND,AWND_ATTRIBUTES,DAPR,DAPR_ATTRIBUTES,FMTM,FMTM_ATTRIBUTES,MDPR,MDPR_ATTRIBUTES,PGTM,PGTM_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNOW,SNOW_ATTRIBUTES,SNWD,SNWD_ATTRIBUTES,TAVG,TAVG_ATTRIBUTES,TMAX,TMAX_ATTRIBUTES,TMIN,TMIN_ATTRIBUTES,TOBS,TOBS_ATTRIBUTES,WDF2,WDF2_ATTRIBUTES,WDF5,WDF5_ATTRIBUTES,WSF2,WSF2_ATTRIBUTES,WSF5,WSF5_ATTRIBUTES,WT01,WT01_ATTRIBUTES,WT02,WT02_ATTRIBUTES,WT03,WT03_ATTRIBUTES,WT04,WT04_ATTRIBUTES,WT05,WT05_ATTRIBUTES,WT06,WT06_ATTRIBUTES,WT08,WT08_ATTRIBUTES,WT10,WT10_ATTRIBUTES,WT11,WT11_ATTRIBUTES,WT13,WT13_ATTRIBUTES,WT14,WT14_ATTRIBUTES,WT16,WT16_ATTRIBUTES,WT18,WT18_ATTRIBUTES,WT21,WT21_ATTRIBUTES
0,US1LAOR0006,"NEW ORLEANS 2.1 ENE, LA US",29.961679,-90.038803,2.4,2015-02-01,,,,,,,,,,,0.03,",,N",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,US1LAOR0006,"NEW ORLEANS 2.1 ENE, LA US",29.961679,-90.038803,2.4,2015-02-02,,,,,,,,,,,0.04,",,N",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,US1LAOR0006,"NEW ORLEANS 2.1 ENE, LA US",29.961679,-90.038803,2.4,2015-02-03,,,,,,,,,,,0.0,"T,,N",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,US1LAOR0006,"NEW ORLEANS 2.1 ENE, LA US",29.961679,-90.038803,2.4,2015-02-04,,,,,,,,,,,0.5,",,N",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,US1LAOR0006,"NEW ORLEANS 2.1 ENE, LA US",29.961679,-90.038803,2.4,2015-02-05,,,,,,,,,,,0.59,",,N",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
weather["PRCP_ATTRIBUTES"].unique()

array([',,N', 'T,,N', nan, ',L,N', ',,X,2400', 'T,,X,2400', 'T,,Z,2400',
       ',,W,2400', 'T,,W,2400', ',,K,0800', ',,7,0800', ',L,K,0800',
       ',,K,2400', 'T,,K,2400', ',,7,2400', 'T,,7,2400', ',,A,2400',
       ',,Z,2400', ',,K,', 'T,,K,', ',,7,', ',,Z,', 'T,,7,', ',,H,'],
      dtype=object)

In [11]:
weather["STATION"].unique()

array(['US1LAOR0006', 'US1LAOR0016', 'USW00012916', 'US1LAOR0003',
       'US1LAOR0014', 'USC00166666', 'US1LAOR0012', 'USW00053917',
       'USW00012930', 'US1LAOR0009', 'US1LAOR0019'], dtype=object)

## Tidying Calls for Service Dataframe

First, we drop some unneccessary columns and then make sure each variable has the correct data type

In [24]:
calls_for_service.dtypes

Unnamed: 0                  int64
NOPD_Item                  object
Type_                      object
TypeText                   object
Priority                   object
MapX                      float64
MapY                      float64
TimeCreate         datetime64[ns]
TimeDispatch       datetime64[ns]
TimeArrive         datetime64[ns]
TimeClosed         datetime64[ns]
Disposition                object
DispositionText            object
BLOCK_ADDRESS              object
Zip                       float64
PoliceDistrict              int64
Location                   object
InitialType                object
InitialTypeText            object
InitialPriority            object
SelfInitiated              object
Beat                       object
Type                       object
TimeArrival                object
dtype: object

In [25]:
calls_for_service['Zip'] = calls_for_service['Zip'].astype(str)

In [23]:
calls_for_service['TimeCreate'] = pd.to_datetime(calls_for_service['TimeCreate'])
calls_for_service['TimeDispatch'] = pd.to_datetime(calls_for_service['TimeCreate'])
calls_for_service['TimeArrive'] = pd.to_datetime(calls_for_service['TimeCreate'])
calls_for_service['TimeClosed'] = pd.to_datetime(calls_for_service['TimeCreate'])

In [19]:
calls_for_service.drop(['Unnamed: 0'], axis =1);

Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,MapX,MapY,TimeCreate,TimeDispatch,TimeArrive,TimeClosed,Disposition,DispositionText,BLOCK_ADDRESS,Zip,PoliceDistrict,Location,InitialType,InitialTypeText,InitialPriority,SelfInitiated,Beat,Type,TimeArrival
0,A0052411,103M,MENTAL PATIENT,2B,37369000.0,3513814.0,01/01/2011 04:32:24 AM,01/01/2011 04:35:01 AM,01/01/2011 04:44:15 AM,01/01/2011 05:15:45 AM,NAT,NECESSARY ACTION TAKEN,016XX Monroe St,70118.0,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
1,B1164911,21,COMPLAINT OTHER,1H,37369000.0,3513814.0,02/08/2011 12:00:51 PM,02/08/2011 12:00:51 PM,02/08/2011 12:00:51 PM,02/08/2011 12:19:58 PM,NAT,NECESSARY ACTION TAKEN,001XX Royal Street,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
2,A0106111,103,DISTURBANCE (OTHER),1C,37369000.0,3513814.0,01/01/2011 03:41:48 PM,01/01/2011 03:41:48 PM,01/01/2011 03:41:48 PM,01/01/2011 03:43:30 PM,NAT,NECESSARY ACTION TAKEN,040XX S Carrollton,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
3,A0144311,18,TRAFFIC INCIDENT,1H,37369000.0,3513814.0,01/01/2011 07:48:40 PM,01/01/2011 07:48:40 PM,01/01/2011 07:48:40 PM,01/01/2011 07:58:44 PM,NAT,NECESSARY ACTION TAKEN,Rampart & Esplanade Ave,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
4,C2899911,21L,LOST OR STOLEN,0E,37369000.0,3513814.0,03/19/2011 04:05:57 PM,03/19/2011 04:05:58 PM,03/19/2011 04:05:58 PM,03/19/2011 04:23:30 PM,RTF,REPORT TO FOLLOW,003XX O'Keefe,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5622037,L3712720,,AREA CHECK,1K,3668242.0,545430.0,12/31/2020 03:19:51 AM,12/31/2020 03:19:51 AM,12/31/2020 03:19:51 AM,12/31/2020 03:55:26 AM,NAT,Necessary Action Taken,Canal Blvd & Florida Blvd,70124.0,3,POINT (-90.10946298 29.99414031),22A,AREA CHECK,1K,Y,3E01,22A,
5622038,L3787720,,FIREWORKS,2J,3669009.0,537961.0,12/31/2020 06:48:06 PM,12/31/2020 06:48:47 PM,,12/31/2020 06:48:54 PM,NAT,Necessary Action Taken,044XX Baudin St,70119.0,3,POINT (-90.10729416 29.97357998),94F,FIREWORKS,2J,N,3D02,94F,
5622039,L3698820,,COMPLAINT OTHER,1A,3669800.0,525664.0,12/31/2020 12:10:51 AM,12/31/2020 07:43:14 AM,12/31/2020 07:52:27 AM,12/31/2020 08:46:59 AM,GOA,GONE ON ARRIVAL,045XX S Claiborne Ave,70115.0,2,POINT (-90.10521219 29.93974579),21,COMPLAINT OTHER,1J,N,2N03,21,
5622040,L3802620,,PUBLIC GATHERING,1E,3668192.0,553134.0,12/31/2020 08:40:46 PM,12/31/2020 09:24:31 PM,12/31/2020 09:58:44 PM,12/31/2020 10:12:08 PM,NAT,Necessary Action Taken,068XX Louisville St,70124.0,3,POINT (-90.10936061 30.01532499),GATHER,PUBLIC GATHERING,1E,N,3F02,GATHER,


In [28]:
calls_for_service.head()

Unnamed: 0.1,Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,MapX,MapY,TimeCreate,TimeDispatch,TimeArrive,TimeClosed,Disposition,DispositionText,BLOCK_ADDRESS,Zip,PoliceDistrict,Location,InitialType,InitialTypeText,InitialPriority,SelfInitiated,Beat,Type,TimeArrival
0,0,A0052411,103M,MENTAL PATIENT,2B,37369000.0,3513814.0,2011-01-01 04:32:24,2011-01-01 04:32:24,2011-01-01 04:32:24,2011-01-01 04:32:24,NAT,NECESSARY ACTION TAKEN,016XX Monroe St,70118.0,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
1,1,B1164911,21,COMPLAINT OTHER,1H,37369000.0,3513814.0,2011-02-08 12:00:51,2011-02-08 12:00:51,2011-02-08 12:00:51,2011-02-08 12:00:51,NAT,NECESSARY ACTION TAKEN,001XX Royal Street,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
2,2,A0106111,103,DISTURBANCE (OTHER),1C,37369000.0,3513814.0,2011-01-01 15:41:48,2011-01-01 15:41:48,2011-01-01 15:41:48,2011-01-01 15:41:48,NAT,NECESSARY ACTION TAKEN,040XX S Carrollton,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
3,3,A0144311,18,TRAFFIC INCIDENT,1H,37369000.0,3513814.0,2011-01-01 19:48:40,2011-01-01 19:48:40,2011-01-01 19:48:40,2011-01-01 19:48:40,NAT,NECESSARY ACTION TAKEN,Rampart & Esplanade Ave,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,
4,4,C2899911,21L,LOST OR STOLEN,0E,37369000.0,3513814.0,2011-03-19 16:05:57,2011-03-19 16:05:57,2011-03-19 16:05:57,2011-03-19 16:05:57,RTF,REPORT TO FOLLOW,003XX O'Keefe,,0,"(9.235500543E-7, -0.0000022929847665)",,,,,,,


## EDA

Let's take a look at some firework related 911 calls. Before plotting, we would expect there to be an influx on certain days of the year (NYE, July 4).

In [35]:
filtered_df = calls_for_service[calls_for_service["TypeText"].str.contains('FIRE BOMB|EXPLOSION|FIREWORKS|ILLEGAL FIREWORKS')]

In [40]:
filtered_df["TypeText"].value_counts()

TypeText
FIREWORKS            5908
ILLEGAL FIREWORKS     169
EXPLOSION              43
FIRE BOMB               1
Name: count, dtype: int64

Woah, looks like a lot of calls for fireworks. We included some other related categories just to see them in the plot. The bokeh labels will distinguish which incident corresponds to each data point. I will graph it compared to the Police District of the call. 

In [42]:
palette = ["#053061", "#67001f"]
TypeText = sorted(filtered_df.TypeText.unique())
MARKERS = ['hex', 'circle_x', 'triangle','star']

police_district = filtered_df['PoliceDistrict']

TITLE = "Police District vs Call Time"
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

u = figure(tools=TOOLS, toolbar_location="above", width=1200, title=TITLE, x_axis_type="datetime")
u.toolbar.logo = "grey"
u.background_fill_color = "#efefef"
u.xaxis.axis_label = "Police District"
u.yaxis.axis_label = "Call Time"
u.grid.grid_line_color = "white"

source = ColumnDataSource(filtered_df)

u.scatter("TimeCreate", "PoliceDistrict", source=source,
          legend_group="TypeText", fill_alpha=0.4, size=12,
          marker=factor_mark('TypeText', MARKERS, TypeText),
          color=factor_cmap('TypeText', 'Category10_4', TypeText))

show(u)

## Closing Thoughts and Final Goals
------
Lorem Ipsum