In [1]:
import pandas as pd
import numpy as np

In [3]:
arrest_data = pd.read_csv("arrest-data.csv")
census_data = pd.read_excel("census-data.xlsx")

In [4]:
arrest_data.sample(15)

Unnamed: 0,PK,CCR,AGE,GENDER,RACE,ARRESTTIME,ARRESTLOCATION,OFFENSES,INCIDENTLOCATION,INCIDENTNEIGHBORHOOD,INCIDENTZONE,INCIDENTTRACT,COUNCIL_DISTRICT,PUBLIC_WORKS_DIVISION,X,Y
31393,2014419,19103332,27.0,F,W,2019-05-29T12:45:00,"5100 Block Liberty AV Pittsburgh, PA 15224",3746 Immediate Notice of Accident to Police De...,"5100 Block Liberty AV Pittsburgh, PA 15224",Bloomfield,5.0,804.0,7.0,2.0,-79.943776,40.458509
37614,2022404,19255908,58.0,M,B,2019-12-17T18:35:00,"1400 Block 5th AV Pittsburgh, PA 15219",13(a)(16) Possession of Controlled Substance /...,"1400 Block 5th AV Pittsburgh, PA 15219",Bluff,2.0,103.0,6.0,3.0,-79.986313,40.438378
23259,2004038,18126409,53.0,M,B,2018-07-04T16:50:00,"1200 Block Paulson AV Pittsburgh, PA 15206",2701 Simple Assault. / 2702 Aggravated Assault...,",",,,,,,0.0,0.0
29759,2012320,18241776,31.0,M,B,2019-04-08T17:25:00,"900 Block Second AV Pittsburgh, PA 15219",2702 Aggravated Assault. / 3701 Robbery.,"2600 Block S 18th ST Pittsburgh, PA 15210",South Side Slopes,3.0,1706.0,3.0,3.0,-79.985287,40.421072
46175,2034302,21011343,55.0,F,W,2021-01-20T12:25:00,"2000 Block Wharton ST Pittsburgh, PA 15203","3929(a)(1) Retail Theft; takes possession of, ...","2000 Block Wharton ST Pittsburgh, PA 15203",South Side Flats,3.0,1702.0,3.0,3.0,-79.976433,40.430341
22165,2002641,18132247,22.0,M,B,2018-07-12T11:22:00,"5500 Block Margaretta ST Pittsburgh, PA 15206",903 Criminal Conspiracy. / 6105(a)(1) Persons ...,"5500 Block Margaretta ST Pittsburgh, PA 15206",Garfield,5.0,1114.0,9.0,2.0,-79.930164,40.468379
23671,2004589,18158392,58.0,M,B,2018-08-17T09:56:00,"7200 Block Frankstown AV Pittsburgh, PA 15208",13(a)(32) Paraphernalia - Use or Possession,"7200 Block Frankstown AV Pittsburgh, PA 15208",Homewood South,5.0,1303.0,9.0,2.0,-79.89627,40.457389
44900,2032462,20206433,24.0,M,W,2020-11-04T20:12:00,"4TH ST & Cherry WY Pittsburgh, PA 15219",2707 Propulsion of Missiles / 5507 Obstructin...,"FIFTH AV & Grant ST Pittsburgh, PA 15219",Central Business District,2.0,201.0,6.0,6.0,-79.996811,40.439395
8127,1984765,17064631,14.0,M,B,2017-05-03T15:45:00,"600 Block 1st AV Pittsburgh, PA 15219",903 Criminal Conspiracy. / 2701 Simple Assault...,"Beechview AV & Broadway AV Pittsburgh, PA 15216",Beechview,6.0,1916.0,4.0,5.0,-80.02468,40.41062
40179,2025825,20052200,27.0,M,B,2020-03-15T04:07:00,"Perrysville AV & Kennedy AV Pittsburgh, PA 15214",3334 Turning Movements and Required Signals. /...,"Perrysville AV & Kennedy AV Pittsburgh, PA 15214",Perry South,1.0,2615.0,1.0,1.0,-80.010431,40.474311


In [15]:
# CRIME TYPES AND WEIGHTS
# Theft 4
# Burglary 4
# Simple Assault 2
# Aggravated Assault 4
# Homicide 10
# Robbery 4
# Kidnapping 8

# Idea: Get total offenses by neighborhood
#       Get number of different types of crimes by neighborhood
#       Multiply the crime types by (weight - 1) (so we can add their values to the total offenses by neighborhood)
#       Add the crime types by neighborhood value to total offenses
#       Divide this number by the population * some constant (maybe weighted crime per 10k or something)
#       Graph total offenses by neighborhood, different crimes by neighborhood, crimes per capita, weighted crimes per capita

# Creates a series for each crime in 'crimeTypes', containing the number of instances of that crime
# Each crime series is added to the dictionary 'crimeList'
def addCrimes(crimeTypes, crimeList):
    otherMask = offenses.str.contains("ABCDEFGHIJKLMNOP") # Should be false for everything
    
    for crime in crimeTypes:
        mask = offenses.str.contains(crime, na=False)
        a = arrest_data[mask].groupby("INCIDENTNEIGHBORHOOD")["OFFENSES"].count()
        crimeList[crime] = a
        otherMask = mask | otherMask # Sets any rows we used to true
    
    # All rows we DIDN'T use are added as "Other" (note the ~)
    a = arrest_data[~otherMask].groupby("INCIDENTNEIGHBORHOOD")["OFFENSES"].count()
    crimeList["Other"] = a
    

offenses = arrest_data["OFFENSES"]

# Group crimes by neighborhood into a dictionary
crimeList = {}
addCrimes(["Theft", "Burglary", "Simple Assault", "Aggravated Assault", "Homicide", "Robbery", "Kidnapping"], crimeList)


# Putt all crime types into one DataFrame
crimeInstances = pd.DataFrame(crimeList)

# merge crime types with population
cd = census_data.set_index("Neighborhood")["Pop. 2010"]
crimeInstances = crimeInstances.merge(cd, how='outer', left_index=True, right_index=True)
crimeInstances.fillna(0)

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010
Allegheny Center,36.0,7.0,70.0,62.0,1.0,23.0,4.0,690.0,933.0
Allegheny West,10.0,6.0,11.0,3.0,0.0,0.0,0.0,63.0,462.0
Allentown,36.0,16.0,169.0,37.0,1.0,16.0,1.0,442.0,2500.0
Arlington,18.0,7.0,65.0,25.0,0.0,10.0,0.0,100.0,1869.0
Arlington Heights,3.0,0.0,36.0,29.0,0.0,7.0,0.0,43.0,244.0
...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,19.0,11.0,43.0,6.0,0.0,4.0,1.0,77.0,2669.0
West End,8.0,4.0,21.0,7.0,2.0,6.0,4.0,156.0,254.0
West Oakland,24.0,11.0,59.0,17.0,2.0,9.0,2.0,126.0,2604.0
Westwood,33.0,3.0,50.0,11.0,0.0,10.0,0.0,68.0,3066.0


In [16]:
# Theft 4
# Burglary 4
# Simple Assault 2
# Aggravated Assault 4
# Homicide 10
# Robbery 4
# Kidnapping 8
weighted = crimeInstances
weighted["Theft"] = crimeInstances["Theft"]*4
weighted["Burglary"] = crimeInstances["Burglary"]*4
weighted["Simple Assault"] = crimeInstances["Simple Assault"]*2
weighted["Aggravated Assault"] = crimeInstances["Aggravated Assault"]*4
weighted["Homicide"] = crimeInstances["Homicide"]*10
weighted["Robbery"] = crimeInstances["Robbery"]*4
weighted["Kidnapping"] = crimeInstances["Kidnapping"]*4

In [18]:
weighted = weighted.fillna(0)
weighted

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010
Allegheny Center,144.0,28.0,140.0,248.0,10.0,92.0,16.0,690.0,933.0
Allegheny West,40.0,24.0,22.0,12.0,0.0,0.0,0.0,63.0,462.0
Allentown,144.0,64.0,338.0,148.0,10.0,64.0,4.0,442.0,2500.0
Arlington,72.0,28.0,130.0,100.0,0.0,40.0,0.0,100.0,1869.0
Arlington Heights,12.0,0.0,72.0,116.0,0.0,28.0,0.0,43.0,244.0
...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,76.0,44.0,86.0,24.0,0.0,16.0,4.0,77.0,2669.0
West End,32.0,16.0,42.0,28.0,20.0,24.0,16.0,156.0,254.0
West Oakland,96.0,44.0,118.0,68.0,20.0,36.0,8.0,126.0,2604.0
Westwood,132.0,12.0,100.0,44.0,0.0,40.0,0.0,68.0,3066.0


In [20]:
weighted["Total"] = weighted["Theft"]+weighted["Burglary"]+weighted["Simple Assault"]+weighted["Aggravated Assault"]+weighted["Homicide"]+weighted["Robbery"]+weighted["Kidnapping"]+weighted["Other"]
weighted

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010,Total
Allegheny Center,144.0,28.0,140.0,248.0,10.0,92.0,16.0,690.0,933.0,1368.0
Allegheny West,40.0,24.0,22.0,12.0,0.0,0.0,0.0,63.0,462.0,161.0
Allentown,144.0,64.0,338.0,148.0,10.0,64.0,4.0,442.0,2500.0,1214.0
Arlington,72.0,28.0,130.0,100.0,0.0,40.0,0.0,100.0,1869.0,470.0
Arlington Heights,12.0,0.0,72.0,116.0,0.0,28.0,0.0,43.0,244.0,271.0
...,...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,76.0,44.0,86.0,24.0,0.0,16.0,4.0,77.0,2669.0,327.0
West End,32.0,16.0,42.0,28.0,20.0,24.0,16.0,156.0,254.0,334.0
West Oakland,96.0,44.0,118.0,68.0,20.0,36.0,8.0,126.0,2604.0,516.0
Westwood,132.0,12.0,100.0,44.0,0.0,40.0,0.0,68.0,3066.0,396.0


In [23]:
weighted["Total/Pop"]=weighted["Total"]/weighted["Pop. 2010"]
weighted

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010,Total,Total/Pop
Allegheny Center,144.0,28.0,140.0,248.0,10.0,92.0,16.0,690.0,933.0,1368.0,1.466238
Allegheny West,40.0,24.0,22.0,12.0,0.0,0.0,0.0,63.0,462.0,161.0,0.348485
Allentown,144.0,64.0,338.0,148.0,10.0,64.0,4.0,442.0,2500.0,1214.0,0.485600
Arlington,72.0,28.0,130.0,100.0,0.0,40.0,0.0,100.0,1869.0,470.0,0.251471
Arlington Heights,12.0,0.0,72.0,116.0,0.0,28.0,0.0,43.0,244.0,271.0,1.110656
...,...,...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,76.0,44.0,86.0,24.0,0.0,16.0,4.0,77.0,2669.0,327.0,0.122518
West End,32.0,16.0,42.0,28.0,20.0,24.0,16.0,156.0,254.0,334.0,1.314961
West Oakland,96.0,44.0,118.0,68.0,20.0,36.0,8.0,126.0,2604.0,516.0,0.198157
Westwood,132.0,12.0,100.0,44.0,0.0,40.0,0.0,68.0,3066.0,396.0,0.129159
