## The below script loads data from Calls_for_service file and merges with data from max_cfs_ucr_categories file.
Following constraints are applied:
- Records year range = 2012 - 2016
- Records Disposition = RTF 
- Records CrimeType = Violent Crime

In [1]:
import os
import csv
import zipfile
import string
import pandas as pd

## Data Loading

### Load Calls-for-Service Data

In [2]:
# Set location of file
path = os.path.join("..\\Datasets\\Raw_Data\\Calls_for_Service\\")
path

'..\\Datasets\\Raw_Data\\Calls_for_Service\\'

In [3]:
# Get filenames
filenames = os.listdir(path)
filenames

['Calls_for_Service_2012.zip',
 'Calls_for_Service_2013.zip',
 'Calls_for_Service_2014.zip',
 'Calls_for_Service_2015.zip',
 'Calls_for_Service_2016.zip']

In [4]:
# Load data from files in list
dfs = []
for f in filenames:
    zf = zipfile.ZipFile(os.path.join(path,f)) 
    dfs.append(pd.read_csv(zf.open(string.replace(f, 'zip', 'csv')),))

# Merge all df in list
cfs_df = pd.concat(dfs, ignore_index=True)

# Change datatype of column Type_ to String
cfs_df.Type_ = cfs_df.Type_.apply(str)

In [5]:
# Display top 5 rows
cfs_df.head()

Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
0,A0000112,62A,"BURGLAR ALARM, SILEN",2C,,,,3683627,532625,1/1/2012 0:00,...,,1/1/2012 0:33,NAT,NECESSARY ACTION TAKEN,,,009XX Decatur St,70116.0,8,"(29.958469303316875, -90.0613152964016)"
1,A0000412,94,DISCHARGING FIREARMS,2B,,,,3732996,562418,1/1/2012 0:00,...,1/1/2012 0:16,1/1/2012 0:30,UNF,UNFOUNDED,,,147XX Chef Menteur Hwy,70129.0,7,"(30.038788769111676, -89.90425047516077)"
2,A0000212,103,DISTURBANCE (OTHER),1C,,,,3687688,548824,1/1/2012 0:01,...,1/1/2012 0:01,1/1/2012 0:19,NAT,NECESSARY ACTION TAKEN,,,038XX Gentilly Blvd,70122.0,3,"(30.002886229898206, -90.04791794333323)"
3,A0000712,21,COMPLAINT OTHER,1H,,,,3670776,521242,1/1/2012 0:01,...,,1/1/2012 0:20,NAT,NECESSARY ACTION TAKEN,,,Carondelet St & Napoleon Ave,70115.0,2,"(29.927555772946167, -90.10228161624175)"
4,A0000512,62A,"BURGLAR ALARM, SILEN",2C,,,,3665739,549621,1/1/2012 0:01,...,1/1/2012 0:09,1/1/2012 1:55,NAT,NECESSARY ACTION TAKEN,,,002XX W Harrison Ave,70124.0,3,"(30.005736477457617, -90.11723146931276)"


In [6]:
# Number of records
len(cfs_df)

2252907

In [7]:
# Column Names
cfs_df.columns

Index([u'NOPD_Item', u'Type_', u'TypeText', u'Priority', u'InitialType',
       u'InitialTypeText', u'InitialPriority', u'MapX', u'MapY', u'TimeCreate',
       u'TimeDispatch', u'TimeArrive', u'TimeClosed', u'Disposition',
       u'DispositionText', u'SelfInitiated', u'Beat', u'BLOCK_ADDRESS', u'Zip',
       u'PoliceDistrict', u'Location'],
      dtype='object')

### Filter Records with Disposition == 'RTF' 

In [8]:
cfs_df = cfs_df[cfs_df.Disposition == 'RTF']

In [9]:
# Number of records
len(cfs_df)

428570

In [10]:
# Display top 5 rows
cfs_df.head()

Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeArrive,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location
64,A0006912,29S,SUICIDE,2B,,,,37369000,3513814,1/1/2012 0:16,...,1/1/2012 0:32,1/1/2012 0:59,RTF,REPORT TO FOLLOW,,,049XX Major Dr,70128.0,0,"(9.235500542976004E-7, -0.000002292984766499941)"
79,A0008112,966,DRUG VIOLATIONS,1G,,,,3680920,540222,1/1/2012 0:22,...,1/1/2012 0:23,1/1/2012 1:13,RTF,REPORT TO FOLLOW,,,St Bernard Ave & N Tonti St,70119.0,1,"(29.979440793120016, -90.06959870234981)"
80,A0008212,21U,UNDER AGE DRINKING V,1F,,,,3681403,531510,1/1/2012 0:22,...,1/1/2012 0:22,1/1/2012 1:33,RTF,REPORT TO FOLLOW,,,003XX Bourbon St,70112.0,8,"(29.95547113098435, -90.06837696099961)"
83,A0008312,66,EXTORTION (THREATS),1F,,,,3691753,536508,1/1/2012 0:23,...,1/1/2012 0:25,1/1/2012 1:19,RTF,REPORT TO FOLLOW,,,014XX Alvar St,70117.0,5,"(29.96889564722802, -90.03551677167968)"
92,A0009412,94,DISCHARGING FIREARMS,2B,,,,3696950,533169,1/1/2012 0:28,...,1/1/2012 0:28,1/1/2012 1:36,RTF,REPORT TO FOLLOW,,,054XX Burgundy St,70117.0,5,"(29.959551708547103, -90.01922533580768)"


### Load Categories Data

In [11]:
# Set location of file
fname = "..\\Datasets\\Raw_Data\\MAX_CFS_UCR_Categories.xlsx"

# Load file
crime_types = pd.read_excel(fname,sheetname='Sheet1')

# Select required columns
crime_types = crime_types.ix[:,['Code','UCR MAIN']]

# Rename columns
crime_types.rename(columns={'Code':'Type_','UCR MAIN':'CrimeType'},inplace=True)

# Change datatype of column Type_ to String
crime_types.Type_ = crime_types.Type_.apply(str)

### Filter Records with CrimeType == 'VIOLENT CRIME'

In [12]:
crime_types = crime_types[crime_types.CrimeType == 'VIOLENT CRIME']

In [13]:
crime_types.head()

Unnamed: 0,Type_,CrimeType
71,30,VIOLENT CRIME
72,30C,VIOLENT CRIME
73,30D,VIOLENT CRIME
74,30S,VIOLENT CRIME
75,34,VIOLENT CRIME


### Merge Calls-for-Service with Categories data

In [14]:
merged_df = pd.merge(cfs_df,crime_types,on='Type_',how='left')

In [15]:
merged_df.head()

Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location,CrimeType
0,A0006912,29S,SUICIDE,2B,,,,37369000,3513814,1/1/2012 0:16,...,1/1/2012 0:59,RTF,REPORT TO FOLLOW,,,049XX Major Dr,70128.0,0,"(9.235500542976004E-7, -0.000002292984766499941)",
1,A0008112,966,DRUG VIOLATIONS,1G,,,,3680920,540222,1/1/2012 0:22,...,1/1/2012 1:13,RTF,REPORT TO FOLLOW,,,St Bernard Ave & N Tonti St,70119.0,1,"(29.979440793120016, -90.06959870234981)",
2,A0008212,21U,UNDER AGE DRINKING V,1F,,,,3681403,531510,1/1/2012 0:22,...,1/1/2012 1:33,RTF,REPORT TO FOLLOW,,,003XX Bourbon St,70112.0,8,"(29.95547113098435, -90.06837696099961)",
3,A0008312,66,EXTORTION (THREATS),1F,,,,3691753,536508,1/1/2012 0:23,...,1/1/2012 1:19,RTF,REPORT TO FOLLOW,,,014XX Alvar St,70117.0,5,"(29.96889564722802, -90.03551677167968)",
4,A0009412,94,DISCHARGING FIREARMS,2B,,,,3696950,533169,1/1/2012 0:28,...,1/1/2012 1:36,RTF,REPORT TO FOLLOW,,,054XX Burgundy St,70117.0,5,"(29.959551708547103, -90.01922533580768)",


In [16]:
len(merged_df)

428570

### Filter Records with CrimeType = NaN or Null

In [17]:
# Check length of records with CrimeType Null
len(merged_df[merged_df.CrimeType.isnull()])

414616

In [18]:
# Check length of records with CrimeType Not Null
len(merged_df[merged_df.CrimeType.notnull()])

13954

In [19]:
# Remove records with CrimeType Null
cfs_final = merged_df[merged_df.CrimeType.notnull()]

In [20]:
cfs_final.head()

Unnamed: 0,NOPD_Item,Type_,TypeText,Priority,InitialType,InitialTypeText,InitialPriority,MapX,MapY,TimeCreate,...,TimeClosed,Disposition,DispositionText,SelfInitiated,Beat,BLOCK_ADDRESS,Zip,PoliceDistrict,Location,CrimeType
31,A0026112,65,SIMPLE ROBBERY,1B,,,,3680346,531912,1/1/2012 1:44,...,1/1/2012 3:37,RTF,REPORT TO FOLLOW,,,002XX N Rampart St,70112.0,8,"(29.956608537919635, -90.07170060793311)",VIOLENT CRIME
41,A0031612,64G,ARMED ROBBERY WITH G,2A,,,,3702905,557853,1/1/2012 2:10,...,1/1/2012 3:06,RTF,REPORT TO FOLLOW,,,068XX Parc Brittany,70126.0,7,"(30.027234376749696, -89.9995136313823)",VIOLENT CRIME
42,A0031412,64G,ARMED ROBBERY WITH G,2A,,,,3709037,557274,1/1/2012 2:10,...,1/1/2012 3:10,RTF,REPORT TO FOLLOW,,,051XX Bundy Rd,70127.0,7,"(30.025444660587013, -89.98015884238222)",VIOLENT CRIME
43,A0031912,64G,ARMED ROBBERY WITH G,2A,,,,3704889,559236,1/1/2012 2:12,...,1/1/2012 3:40,RTF,REPORT TO FOLLOW,,,085XX N I-10 Service Rd,70127.0,7,"(30.030973436384716, -89.99319330558717)",VIOLENT CRIME
93,A0060312,34C,AGGRAVATED BATTERY B,2B,,,,3680929,530923,1/1/2012 6:14,...,1/1/2012 7:25,RTF,REPORT TO FOLLOW,,,Bourbon St & Canal St,70112.0,8,"(29.95387145933587, -90.06989411880384)",VIOLENT CRIME


In [21]:
cfs_final.Type_.unique()

array(['65', '64G', '34C', '64', '55', '30S', '37', '42', '34S', '43',
       '65J', '30', '37D', '34', '64J', '64K', '34D', '43M', '30C', '42M',
       '30D', '42B'], dtype=object)

In [22]:
len(cfs_final)

13954

In [None]:
# Save the 
fullpath = "..\\Datasets\\Final_Data\\Calls_for_Service.csv"
cfs_final.to_csv(fullpath, sep=',',  index = False)