In [2]:
import pandas as pd

df = pd.read_excel('../data/DU LAW REQUEST.XLSX')

In [4]:
df.head()

Unnamed: 0,COURT_CASE,VIOLATION_DATE,CURRENT_STREET_ADDRESS,STATUTE_DESC,STATUE_NUMBER,LOCATION
0,CR-2015-0000038-GE,2015-01-04 04:50:00,YES,Trespassing,5-4-3,2900 IRIS
1,CR-2015-0000042-GE,2015-01-04 15:40:00,NONE,Trespassing,5-4-3,1800 28TH ST (MCDONALDS)
2,CR-2015-0000046-GE,2015-01-03 23:58:00,YES,Trespassing,5-4-3,1905 15TH ST
3,CR-2015-0000047-GE,2015-01-01 15:43:00,2960 VALMONT RD,Trespassing,5-4-3,3255 28TH ST
4,CR-2015-0000061-GE,2015-01-01 20:05:00,NONE,Trespassing,5-4-3,1650 30TH ST


In [14]:
print('''There are {} rows. The date range is {:%Y-%m-%d} to {:%Y-%m-%d}.

There are {} unique "court cases".
'''.format(
        df.shape[0],
        df.VIOLATION_DATE.min(),
        df.VIOLATION_DATE.max(),
        df.COURT_CASE.nunique(),
))

There are 4568 rows. The date range is 2015-01-01 to 2017-12-30.

There are 3854 unique "court cases".



In [30]:
all_statute_counts = df.STATUTE_DESC.value_counts().to_frame()

1. Can we find the number of charges that aren't bundled with other ones?
2. Which of sets of citations tend to go together?

In [28]:
# create a dataframe with only the citations that were given independently

onesies = df.groupby('COURT_CASE').first()[df.groupby('COURT_CASE').count().VIOLATION_DATE == 1].reset_index()

In [66]:
onesie_charges_merged = pd.merge(onesies.STATUTE_DESC.value_counts().to_frame(), all_statute_counts, how='inner', left_index=True, right_index=True, suffixes=('_onesies', '_all')).fillna(0).rename(columns={
        'STATUTE_DESC_onesies': 'count_onesies',
        'STATUTE_DESC_all': 'count_all',
    })
onesie_charges_merged['proportion_onesies'] = onesie_charges_merged['count_onesies'] / onesie_charges_merged['count_all']

In [67]:
onesie_charges_merged

Unnamed: 0,count_onesies,count_all,proportion_onesies
Trespassing,1252,1564,0.800512
Smoking Prohibited in Public Areas,901,969,0.929825
Camping or Lodging on Property Without Consent,746,916,0.81441
Urinating in Public,251,347,0.723343
Tents And Nets Prohibited,82,104,0.788462
Trespassing on Public Property,20,28,0.714286
Staying on Medians Prohibited,11,12,0.916667
Using a Vehicle as a Residence,9,11,0.818182
Aggressive Begging Prohibited,6,8,0.75
"Obstructing Public Streets, Places - Buildings",3,6,0.5


In [53]:
antihomeless_statutes = onesie_charges_merged.index.tolist()

In [60]:
only_antihomeless_encounters = df.groupby('COURT_CASE').filter(lambda group: group.STATUTE_DESC.isin(antihomeless_statutes).all())

print('''If we ask for encounters where someone was only given ONE citation (no matter what it is), we get {} records.

But if we ask for encounters where all the citations were antihomeless, we get {} records.

(Out of {} total unique encounters).
'''.format(
        onesies.shape[0],
        only_antihomeless_encounters.shape[0],
        df.COURT_CASE.nunique()
    ))



If we ask for encounters where someone was only given ONE citation (no matter what it is), we get 3281 records.

But if we ask for encounters where all the citations were antihomeless, we get 3451 records.

(Out of 3854 total unique encounters).



In [68]:
onesie_charges_merged

Unnamed: 0,count_onesies,count_all,proportion_onesies
Trespassing,1252,1564,0.800512
Smoking Prohibited in Public Areas,901,969,0.929825
Camping or Lodging on Property Without Consent,746,916,0.81441
Urinating in Public,251,347,0.723343
Tents And Nets Prohibited,82,104,0.788462
Trespassing on Public Property,20,28,0.714286
Staying on Medians Prohibited,11,12,0.916667
Using a Vehicle as a Residence,9,11,0.818182
Aggressive Begging Prohibited,6,8,0.75
"Obstructing Public Streets, Places - Buildings",3,6,0.5


In [73]:
antihomeless_charges_merged = pd.merge(all_statute_counts, only_antihomeless_encounters.STATUTE_DESC.value_counts().to_frame(), how='inner', left_index=True, right_index=True, suffixes=('_all', '_antihomeless')).fillna(0).rename(columns={
        'STATUTE_DESC_antihomeless': 'count_antihomeless',
        'STATUTE_DESC_all': 'count_all',
    })
antihomeless_charges_merged['count_onesies'] = onesie_charges_merged['count_onesies']
antihomeless_charges_merged['proportion_antihomeless'] = antihomeless_charges_merged['count_antihomeless'] / antihomeless_charges_merged['count_all']
antihomeless_charges_merged['proportion_onesies'] = onesie_charges_merged['proportion_onesies']

antihomeless_charges_merged

Unnamed: 0,count_all,count_antihomeless,count_onesies,proportion_antihomeless,proportion_onesies
Trespassing,1564,1324,1252,0.846547,0.800512
Smoking Prohibited in Public Areas,969,908,901,0.937049,0.929825
Camping or Lodging on Property Without Consent,916,815,746,0.889738,0.81441
Urinating in Public,347,260,251,0.74928,0.723343
Tents And Nets Prohibited,104,91,82,0.875,0.788462
Trespassing on Public Property,28,23,20,0.821429,0.714286
Staying on Medians Prohibited,12,12,11,1.0,0.916667
Using a Vehicle as a Residence,11,9,9,0.818182,0.818182
Aggressive Begging Prohibited,8,6,6,0.75,0.75
"Obstructing Public Streets, Places - Buildings",6,3,3,0.5,0.5
