### Chicago Crime Dataset

In [21]:
from sklearn.model_selection import train_test_split
import pandas as pd
import data_prep_utils as utils
import numpy as np

In [2]:
chicago_crime = pd.read_csv('data/chicago.csv')

In [3]:
chicago_crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211346 entries, 0 to 211345
Data columns (total 22 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   ID                    211346 non-null  int64  
 1   Case Number           211346 non-null  object 
 2   Date                  211346 non-null  object 
 3   Block                 211346 non-null  object 
 4   IUCR                  211346 non-null  object 
 5   Primary Type          211346 non-null  object 
 6   Description           211346 non-null  object 
 7   Location Description  210130 non-null  object 
 8   Arrest                211346 non-null  bool   
 9   Domestic              211346 non-null  bool   
 10  Beat                  211346 non-null  int64  
 11  District              211346 non-null  int64  
 12  Ward                  211337 non-null  float64
 13  Community Area        211345 non-null  float64
 14  FBI Code              211346 non-null  object 
 15  

In [4]:
chicago_crime.Date = pd.to_datetime(chicago_crime.Date)

In [5]:
chicago_crime = chicago_crime[chicago_crime['Date'].dt.year >= 2008]

In [6]:
chicago_crime.shape

(211346, 22)

In [7]:
chicago_crime['Primary Type'].unique()

array(['THEFT', 'MOTOR VEHICLE THEFT', 'ASSAULT', 'DECEPTIVE PRACTICE',
       'NARCOTICS', 'CRIMINAL DAMAGE', 'ROBBERY', 'BATTERY',
       'OTHER OFFENSE', 'BURGLARY', 'INTERFERENCE WITH PUBLIC OFFICER',
       'SEX OFFENSE', 'WEAPONS VIOLATION', 'OFFENSE INVOLVING CHILDREN',
       'STALKING', 'PUBLIC PEACE VIOLATION', 'ARSON', 'KIDNAPPING',
       'CRIMINAL TRESPASS', 'INTIMIDATION', 'CRIMINAL SEXUAL ASSAULT',
       'CONCEALED CARRY LICENSE VIOLATION', 'HOMICIDE',
       'LIQUOR LAW VIOLATION', 'GAMBLING', 'OBSCENITY', 'PROSTITUTION',
       'PUBLIC INDECENCY', 'HUMAN TRAFFICKING',
       'OTHER NARCOTIC VIOLATION', 'CRIM SEXUAL ASSAULT', 'NON-CRIMINAL',
       'RITUALISM'], dtype=object)

### Theft crimes

In [8]:
chicago_theft_crimes = chicago_crime.copy()

In [9]:
chicago_theft = chicago_theft_crimes['Primary Type'].apply(lambda x: 1 if x ==  'THEFT' else 0)

In [10]:
chicago_theft

0         1
1         0
2         0
3         0
4         0
         ..
211341    0
211342    0
211343    0
211344    1
211345    0
Name: Primary Type, Length: 211346, dtype: int64

In [11]:
chicago_theft_crimes['Primary Type'] = chicago_theft

In [12]:
chicago_theft_crimes = utils.coord_to_grid(chicago_theft_crimes, lat_col='Latitude', lon_col='Longitude')

In [13]:
chicago_theft_crimes = utils.feature_reduce(
    chicago_theft_crimes,
    features=['Date', 'binned_latitude', 'binned_longitude', 'Primary Type'],
    sort_by='Primary Type')

In [14]:
chicago_theft_crimes.head()

Unnamed: 0,Date,binned_latitude,binned_longitude,Primary Type
0,2020-03-17 21:30:00,13,7,1
60006,2020-05-23 15:14:00,9,11,1
163156,2020-03-12 17:00:00,11,7,1
60104,2020-01-13 21:00:00,15,9,1
163149,2020-03-07 23:00:00,14,8,1


In [15]:
chicago_theft_crimes = utils.convert_to_image_data(chicago_theft_crimes.head(1000), crime_col='Primary Type')

Converting...: 34it [00:00, 1707.43it/s]

Time Elapsed: 0 min





In [16]:
chicago_theft_crimes['batches']

34

In [17]:
chicago_theft_X_train, chicago_theft_X_test, chicago_theft_y_train, chicago_theft_y_test = train_test_split(
    chicago_theft_crimes['inputs'],
    chicago_theft_crimes['outputs'],
    test_size=0.33,
    random_state=42)

In [20]:
chicago_theft_X_train.ndim

5

In [23]:
with open('data/binary-classification/chicago/chicago_theft_X_train.npy', 'wb') as f:
    np.save(f, chicago_theft_X_train)

with open('data/binary-classification/chicago/chicago_theft_X_test.npy', 'wb') as f:
    np.save(f, chicago_theft_X_test)
    
with open('data/binary-classification/chicago/chicago_theft_y_train.npy', 'wb') as f:
    np.save(f, chicago_theft_y_train)

with open('data/binary-classification/chicago/chicago_theft_y_test.npy', 'wb') as f:
    np.save(f, chicago_theft_y_test)

### Battery crimes

In [10]:
chicago_battery_crimes = chicago_crime['Primary Type'].apply(lambda x: 1 if x ==  'BATTERY' else 0)

### Criminal damage crimes

In [11]:
chicago_criminal_damage_crimes = chicago_crime['Primary Type'].apply(lambda x: 1 if x ==  'CRIMINAL DAMAGE' else 0)

In [12]:
chicago_theft_crimes.info()

AttributeError: 'Series' object has no attribute 'info'

### Seattle Crime Dataset

In [16]:
seattle_crime = pd.read_csv('seattle.csv')

In [17]:
seattle_crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 961128 entries, 0 to 961127
Data columns (total 17 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   Report Number           961128 non-null  object 
 1   Offense ID              961128 non-null  int64  
 2   Offense Start DateTime  960173 non-null  object 
 3   Offense End DateTime    530226 non-null  object 
 4   Report DateTime         961128 non-null  object 
 5   Group A B               961128 non-null  object 
 6   Crime Against Category  961128 non-null  object 
 7   Offense Parent Group    961128 non-null  object 
 8   Offense                 961128 non-null  object 
 9   Offense Code            961128 non-null  object 
 10  Precinct                961124 non-null  object 
 11  Sector                  961126 non-null  object 
 12  Beat                    961126 non-null  object 
 13  MCPP                    961125 non-null  object 
 14  100 Block Address   

In [18]:
seattle_crime.Date = pd.to_datetime(seattle_crime['Report DateTime'])

  if __name__ == '__main__':


In [30]:
seattle_crime_report_dates = seattle_crime['Report DateTime'].tolist()

In [32]:
seattle_crime_report_dates.sort(reverse=True)
seattle_crime_report_range = [seattle_crime_report_dates[0], seattle_crime_report_dates[-1]]
seattle_crime_report_range

['12/31/2021 12:56:55 AM', '01/01/2008 01:00:00 AM']

In [33]:
len(seattle_crime_report_dates)

961128

In [54]:
!echo *.csv >> .gitignore



In [55]:
!cat .gitignore

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

In [56]:
!git status

On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   .gitignore[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	[31m3D CNN for Crime Classification (Binary Classification).ipynb[m
	[31mBinary Classification Data Preparation.ipynb[m
	[31mchicago.csv[m
	[31mseattle.csv[m

no changes added to commit (use "git add" and/or "git commit -a")


In [62]:
!git status

On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mnew file:   3D CNN for Crime Classification (Binary Classification).ipynb[m
	[32mnew file:   Binary Classification Data Preparation.ipynb[m
	[32mnew file:   chicago.csv[m
	[32mnew file:   seattle.csv[m



In [58]:
!git config --global user.name "Lloyd"

In [59]:
!git config --global user.email lloydmatereke23@gmail.com