## Florida Department of Corrections Inmate Crime and Tattoo Database
#### Joey, Matt, Sean

### 1, 2. The Dataset, Downloaded

In [1]:
import pandas
import numpy as np
import pandas_access
DATASET = "./FDOC_January_2017.mdb"

### 3. Loading into a DF

In [2]:
dfs = {}
for tablename in pandas_access.list_tables(DATASET):
    dfs[tablename] = pandas_access.read_table(DATASET, tablename, low_memory = False)

### 4. Identifying Variables of Interest

In [3]:
dfs.keys()

[u'INMATE_RELEASE_INCARHIST',
 u'INMATE_ACTIVE_ROOT',
 u'INMATE_RELEASE_DETAINERS',
 u'OFFENDER_OFFENSES_CCS',
 u'INMATE_RELEASE_OFFENSES_CPS',
 u'INMATE_RELEASE_RESIDENCE',
 u'INMATE_RELEASE_SCARSMARKS',
 u'INMATE_ACTIVE_ALIASES',
 u'INMATE_ACTIVE_DETAINERS',
 u'INMATE_RELEASE_OFFENSES_prpr',
 u'INMATE_ACTIVE_SCARSMARKS',
 u'OFFENDER_ROOT',
 u'OFFENDER_RESIDENCE',
 u'INMATE_ACTIVE_OFFENSES_prpr',
 u'OFFENDER_ALIASES',
 u'INMATE_RELEASE_ROOT',
 u'INMATE_ACTIVE_INCARHIST',
 u'INMATE_ACTIVE_OFFENSES_CPS',
 u'CONTENTS',
 u'INMATE_RELEASE_ALIASES']

### 5. Validation and Plotting

In [4]:
# http://www.dc.state.fl.us/pub/obis_request.html
# http://cdn.static-economist.com/sites/default/files/images/print-edition/20161224_XMC337_0.png

In [5]:
crimes = pandas.concat((dfs["INMATE_ACTIVE_OFFENSES_prpr"], dfs["INMATE_ACTIVE_OFFENSES_CPS"], dfs["INMATE_RELEASE_OFFENSES_prpr"], dfs["INMATE_RELEASE_OFFENSES_CPS"]))
crime_descriptions = crimes.adjudicationcharge_descr[crimes.adjudicationcharge_descr != "UNKNOWN"]
# crime_descriptions.value_counts()
len(crime_descriptions)

2385210

In [6]:
crime_types = {
    "drugs": ["DRUG", "COCAINE", "SUBS", "MARIJUANA"],
    "theft": ["BURG", "THEFT", "THFT"],
    "robbery": ["ROBB"],
    "assault": ["ASLT", "ASSLT", "ASSAULT", "BATT"],
    "murder": ["MURD", "MRDR"]
}
for category, keywords in crime_types.items():
    count = crime_descriptions.str.contains("|".join(keywords)).value_counts()[True]
    print("{0}: {1}".format(category, count))

drugs: 427856
assault: 206298
murder: 16334
robbery: 140578
theft: 720649


In [7]:
tattoos = pandas.concat((dfs["INMATE_ACTIVE_SCARSMARKS"], dfs["INMATE_RELEASE_SCARSMARKS"]))
tattoo_descriptions = tattoos.Description
# tattoo_descriptions.value_counts()
len(tattoo_descriptions)

1266840

In [8]:
tattoo_types = {
    "face": ["FACE", "HEAD", "CHEEK", "CHIN"],
    "white supremacist": ["WHITE", "ARYAN", "NAZI", "SWASTIKA", "14", "88"],
    "three dots": ["3 DOTS", "THREE DOTS", "THREEDOTS"],
    "tear drop": ["TEAR DROP", "TEARDROP"],
    "guns": ["GUN", "RIFLE", "BULLET"],
    "laugh now, cry later": ["LAUGH NOW CRY LATER", "LAUGH NOW, CRY LATER", "LAUGHNOWCRYLATER"],
    "christian": ["CROSS", "HOLY", "ANGEL", "JESUS", "PRAY"],
    "satanic": ["PENTA", "DEVIL", "DEMON", "BAPHOMET"]
}
for category, keywords in tattoo_types.items():
    count = tattoo_descriptions.str.contains("|".join(keywords)).value_counts()[True]
    print("{0}: {1}".format(category, count))

christian: 138926
three dots: 2753
satanic: 18522
tear drop: 7946
laugh now, cry later: 1750
guns: 21472
white supremacist: 6745
face: 71814


### 6. Cleaning Issues