## Florida Department of Corrections Inmate Crime and Tattoo Database
#### Joey, Matt, Sean

### 1, 2. The Dataset, Downloaded

In [1]:
import pandas
import numpy as np
import pandas_access
DATASET = "./FDOC_January_2017.mdb" # From http://www.dc.state.fl.us/pub/obis_request.html 
                                    # ~1.3 gb; publicly available data.

### 3. Loading into a DF

In [2]:
dfs = {}
for tablename in pandas_access.list_tables(DATASET):
    dfs[tablename] = pandas_access.read_table(DATASET, tablename, low_memory = False)

### 4. Identifying Variables of Interest

In [3]:
dfs.keys()

[u'INMATE_RELEASE_INCARHIST',
 u'INMATE_ACTIVE_ROOT',
 u'INMATE_RELEASE_DETAINERS',
 u'OFFENDER_OFFENSES_CCS',
 u'INMATE_RELEASE_OFFENSES_CPS',
 u'INMATE_RELEASE_RESIDENCE',
 u'INMATE_RELEASE_SCARSMARKS',
 u'INMATE_ACTIVE_ALIASES',
 u'INMATE_ACTIVE_DETAINERS',
 u'INMATE_RELEASE_OFFENSES_prpr',
 u'INMATE_ACTIVE_SCARSMARKS',
 u'OFFENDER_ROOT',
 u'OFFENDER_RESIDENCE',
 u'INMATE_ACTIVE_OFFENSES_prpr',
 u'OFFENDER_ALIASES',
 u'INMATE_RELEASE_ROOT',
 u'INMATE_ACTIVE_INCARHIST',
 u'INMATE_ACTIVE_OFFENSES_CPS',
 u'CONTENTS',
 u'INMATE_RELEASE_ALIASES']

### 5. Validation and Plotting

In [4]:
# http://www.dc.state.fl.us/pub/obis_request.html
# http://cdn.static-economist.com/sites/default/files/images/print-edition/20161224_XMC337_0.png

First, let's look at the number and types of crimes:

In [5]:
crimes = pandas.concat((dfs["INMATE_ACTIVE_OFFENSES_prpr"], dfs["INMATE_ACTIVE_OFFENSES_CPS"], dfs["INMATE_RELEASE_OFFENSES_prpr"], dfs["INMATE_RELEASE_OFFENSES_CPS"]))
crime_descriptions = crimes.adjudicationcharge_descr[crimes.adjudicationcharge_descr != "UNKNOWN"]
# crime_descriptions.value_counts()
len(crime_descriptions)

2385210

In [6]:
crime_types = {
    "drugs": ["DRUG", "COCAINE", "SUBS", "MARIJUANA"],
    "theft": ["BURG", "THEFT", "THFT"],
    "robbery": ["ROBB"],
    "assault": ["ASLT", "ASSLT", "ASSAULT", "BATT"],
    "murder": ["MURD", "MRDR"]
}
for category, keywords in crime_types.items():
    count = crime_descriptions.str.contains("|".join(keywords)).value_counts()[True]
    print("{0}: {1}".format(category, count))

drugs: 427856
assault: 206298
murder: 16334
robbery: 140578
theft: 720649


Next, let's look at the number and types of tattoos:

In [7]:
tattoos = pandas.concat((dfs["INMATE_ACTIVE_SCARSMARKS"], dfs["INMATE_RELEASE_SCARSMARKS"]))
tattoo_descriptions = tattoos.Description
# tattoo_descriptions.value_counts()
len(tattoo_descriptions)

1266840

In [8]:
tattoo_types = {
    "face": ["FACE", "HEAD", "CHEEK", "CHIN"],
    "white supremacist": ["WHITE", "ARYAN", "NAZI", "SWASTIKA", "14", "88"],
    "three dots": ["3 DOTS", "THREE DOTS", "THREEDOTS"],
    "tear drop": ["TEAR DROP", "TEARDROP"],
    "guns": ["GUN", "RIFLE", "BULLET"],
    "laugh now, cry later": ["LAUGH NOW CRY LATER", "LAUGH NOW, CRY LATER", "LAUGHNOWCRYLATER"],
    "christian": ["CROSS", "HOLY", "ANGEL", "JESUS", "PRAY"],
    "satanic": ["PENTA", "DEVIL", "DEMON", "BAPHOMET"]
}
for category, keywords in tattoo_types.items():
    count = tattoo_descriptions.str.contains("|".join(keywords)).value_counts()[True]
    print("{0}: {1}".format(category, count))

christian: 138926
three dots: 2753
satanic: 18522
tear drop: 7946
laugh now, cry later: 1750
guns: 21472
white supremacist: 6745
face: 71814


Now, let's try finding a mapping:

In [9]:
crimes_and_tattoos = pandas.merge(crimes, tattoos, "inner", "DCNumber")
# has duplicate entries for  tattoos in different locations on same person's body;
# find a way to combine crime/tatto descriptions for rows with same dcn?

In [10]:
groupings = {}
for tattoo, tattoowords in tattoo_types.items():
    for crime, crimewords in crime_types.items():
        count = len(crimes_and_tattoos[crimes_and_tattoos.Description.str.contains("|".join(tattoowords)) &\
                          crimes_and_tattoos.adjudicationcharge_descr.str.contains("|".join(crimewords))])
        groupings[(tattoo, crime)] = count
        print("{0} tattoos -> {1} crimes: {2} instances".format(tattoo, crime, count))
    print("~")

christian tattoos -> drugs crimes: 137554 instances
christian tattoos -> assault crimes: 66975 instances
christian tattoos -> murder crimes: 4685 instances
christian tattoos -> robbery crimes: 44783 instances
christian tattoos -> theft crimes: 262135 instances
~
three dots tattoos -> drugs crimes: 1306 instances
three dots tattoos -> assault crimes: 1384 instances
three dots tattoos -> murder crimes: 142 instances
three dots tattoos -> robbery crimes: 1008 instances
three dots tattoos -> theft crimes: 5765 instances
~
satanic tattoos -> drugs crimes: 15281 instances
satanic tattoos -> assault crimes: 10874 instances
satanic tattoos -> murder crimes: 708 instances
satanic tattoos -> robbery crimes: 6562 instances
satanic tattoos -> theft crimes: 49704 instances
~
tear drop tattoos -> drugs crimes: 8059 instances
tear drop tattoos -> assault crimes: 4578 instances
tear drop tattoos -> murder crimes: 344 instances
tear drop tattoos -> robbery crimes: 3468 instances
tear drop tattoos -> th

In [11]:
# relative to crimes committed by same tattoo-havers
for tattoo in tattoo_types.keys():
    avg = sum([groupings[(tattoo, crime)]
               for crime in crime_types.keys()]) \
          / len(crime_types.keys())
    for crime in crime_types.keys():
        relation = "below" if groupings[(tattoo, crime)] < avg else "above"
        print("{0}, {1}: {2} average".format(tattoo, crime, relation))
    print("~")

christian, drugs: above average
christian, assault: below average
christian, murder: below average
christian, robbery: below average
christian, theft: above average
~
three dots, drugs: below average
three dots, assault: below average
three dots, murder: below average
three dots, robbery: below average
three dots, theft: above average
~
satanic, drugs: below average
satanic, assault: below average
satanic, murder: below average
satanic, robbery: below average
satanic, theft: above average
~
tear drop, drugs: above average
tear drop, assault: below average
tear drop, murder: below average
tear drop, robbery: below average
tear drop, theft: above average
~
laugh now, cry later, drugs: above average
laugh now, cry later, assault: below average
laugh now, cry later, murder: below average
laugh now, cry later, robbery: below average
laugh now, cry later, theft: above average
~
guns, drugs: above average
guns, assault: below average
guns, murder: below average
guns, robbery: below average
gu

In [12]:
# relative to tattoos had by same crime-committers
for crime in crime_types.keys():
    avg = sum([groupings[(tattoo, crime)]
               for tattoo in tattoo_types.keys()]) \
          / len(tattoo_types.keys())
    for tattoo in tattoo_types.keys():
        relation = "below" if groupings[(tattoo, crime)] < avg else "above"
        print("{0}, {1}: {2} average".format(tattoo, crime, relation))
    print("~")

christian, drugs: above average
three dots, drugs: below average
satanic, drugs: below average
tear drop, drugs: below average
laugh now, cry later, drugs: below average
guns, drugs: below average
white supremacist, drugs: below average
face, drugs: above average
~
christian, assault: above average
three dots, assault: below average
satanic, assault: below average
tear drop, assault: below average
laugh now, cry later, assault: below average
guns, assault: below average
white supremacist, assault: below average
face, assault: above average
~
christian, murder: above average
three dots, murder: below average
satanic, murder: below average
tear drop, murder: below average
laugh now, cry later, murder: below average
guns, murder: below average
white supremacist, murder: below average
face, murder: above average
~
christian, robbery: above average
three dots, robbery: below average
satanic, robbery: below average
tear drop, robbery: below average
laugh now, cry later, robbery: below averag

### 6. Cleaning Issues

* The same person (and thus, the same crimes) can be counted multiple times if they have multiple tattoo entries for different parts of their bodies.
* For validation, it's hard to know what the baseline that The Economist used was; are their relationships based on the typical crimes for someone with certain tattoos, or the typical tattoos had by someone who commits certain crimes, or something else entirely?