In [2]:
import pandas as pd
import numpy as np

In [3]:
arrest_data = pd.read_csv("arrest-data.csv")
census_data = pd.read_excel("census-data.xlsx")

In [4]:
arrest_data.sample(15)

Unnamed: 0,PK,CCR,AGE,GENDER,RACE,ARRESTTIME,ARRESTLOCATION,OFFENSES,INCIDENTLOCATION,INCIDENTNEIGHBORHOOD,INCIDENTZONE,INCIDENTTRACT,COUNCIL_DISTRICT,PUBLIC_WORKS_DIVISION,X,Y
18759,1998319,18055598,25.0,M,W,2018-03-26T03:00:00,"300 Block Halket ST Pittsburgh, PA 15213",2701(a)(3) Simple Assault - Attempts by Physic...,"300 Block Halket ST Pittsburgh, PA 15213",Central Oakland,4,405.0,3.0,3.0,-79.96154,40.437078
34627,2018658,19180036,54.0,M,W,2019-09-07T16:40:00,"S 22nd ST & E Carson ST Pittsburgh, PA 15203",1301 Registration and Certificate of Title Req...,"S 22nd ST & E Carson ST Pittsburgh, PA 15203",South Side Flats,3,1609.0,3.0,3.0,-79.974642,40.428421
36115,2020461,19218424,48.0,F,B,2019-10-26T11:30:00,"N Euclid AV & East Liberty BL Pittsburgh, PA 1...",907 Possessing Instruments of Crime. / 4910 Ta...,"N Euclid AV & East Liberty BL Pittsburgh, PA 1...",East Liberty,5,1113.0,9.0,2.0,-79.925313,40.466725
31953,2015136,19113008,23.0,M,B,2019-06-10T18:27:00,"Vera Cruz WY & La Place ST Pittsburgh, PA 15219",2702 Aggravated Assault. / 2705 Recklessy Enda...,"400 Block Kirkpatrick ST Pittsburgh, PA 15219",Middle Hill,2,501.0,6.0,3.0,-79.975627,40.444473
33805,2017636,19138840,28.0,F,B,2019-08-21T13:50:00,"600 Block 1st AV Pittsburgh, PA 15219",2701 Simple Assault. / 2706 Terroristic Threat...,"S 17th ST & Bingham ST Pittsburgh, PA 15203",South Side Flats,3,1702.0,3.0,3.0,-79.981986,40.429469
21009,2001154,17073603,50.0,F,B,2018-06-09T21:25:00,"900 Block 2nd AV Pittsburgh, PA 15219",3929 Retail Theft.,"900 Block Freeport RD Pittsburgh, PA 15238",Lincoln-Lemington-Belmar,5,1201.0,9.0,2.0,-79.892353,40.486118
45181,2032844,20216258,20.0,F,W,2020-11-20T13:20:00,"N St Clair ST & Callowhill ST Pittsburgh, PA 1...",1310 Temporary Registration Cards. / 13(a)(32)...,"N St Clair ST & Callowhill ST Pittsburgh, PA 1...",Highland Park,5,1102.0,7.0,2.0,-79.91987,40.477567
24060,2005082,18173839,40.0,M,W,2018-09-06T02:00:00,"600 Block Boggs AV Pittsburgh, PA 15211",907 Possessing Instruments of Crime. / 4952 In...,"600 Block Boggs AV Pittsburgh, PA 15211",Mount Washington,3,1915.0,,,0.0,0.0
22093,2002560,18129436,39.0,M,W,2018-07-08T17:17:00,"Millbridge ST & Ceres WY Pittsburgh, PA 15210",908 Prohibited Offensive Weapons. / 1311 Regis...,"Millbridge ST & Ceres WY Pittsburgh, PA 15210",Allentown,3,1803.0,3.0,5.0,-79.995852,40.423252
37105,2021730,19243623,30.0,M,B,2019-12-04T11:50:00,"1400 Block Locust ST Pittsburgh, PA 15219",2701 Simple Assault.,"200 Block Heinz ST Pittsburgh, PA 15212",Troy Hill,1,2406.0,1.0,1.0,-79.990706,40.453728


In [5]:
# CRIME TYPES AND WEIGHTS
# Theft 4
# Burglary 4
# Simple Assault 2
# Aggravated Assault 4
# Homicide 10
# Robbery 4
# Kidnapping 8

# Idea: Get total offenses by neighborhood
#       Get number of different types of crimes by neighborhood
#       Multiply the crime types by (weight - 1) (so we can add their values to the total offenses by neighborhood)
#       Add the crime types by neighborhood value to total offenses
#       Divide this number by the population * some constant (maybe weighted crime per 10k or something)
#       Graph total offenses by neighborhood, different crimes by neighborhood, crimes per capita, weighted crimes per capita

# Creates a series for each crime in 'crimeTypes', containing the number of instances of that crime
# Each crime series is added to the dictionary 'crimeList'
def addCrimes(crimeTypes, crimeList):
    otherMask = offenses.str.contains("ABCDEFGHIJKLMNOP") # Should be false for everything
    
    for crime in crimeTypes:
        mask = offenses.str.contains(crime, na=False)
        a = arrest_data[mask].groupby("INCIDENTNEIGHBORHOOD")["OFFENSES"].count()
        crimeList[crime] = a
        otherMask = mask | otherMask # Sets any rows we used to true
    
    # All rows we DIDN'T use are added as "Other" (note the ~)
    a = arrest_data[~otherMask].groupby("INCIDENTNEIGHBORHOOD")["OFFENSES"].count()
    crimeList["Other"] = a
    

offenses = arrest_data["OFFENSES"]

# Group crimes by neighborhood into a dictionary
crimeList = {}
addCrimes(["Theft", "Burglary", "Simple Assault", "Aggravated Assault", "Homicide", "Robbery", "Kidnapping"], crimeList)


# Putt all crime types into one DataFrame
crimeInstances = pd.DataFrame(crimeList)

# merge crime types with population
cd = census_data.set_index("Neighborhood")["Pop. 2010"]
crimeInstances = crimeInstances.merge(cd, how='outer', left_index=True, right_index=True)
crimeInstances.fillna(0)

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010
Allegheny Center,36.0,7.0,70.0,62.0,1.0,23.0,4.0,690.0,933.0
Allegheny West,10.0,6.0,11.0,3.0,0.0,0.0,0.0,63.0,462.0
Allentown,36.0,16.0,169.0,37.0,1.0,16.0,1.0,442.0,2500.0
Arlington,18.0,7.0,65.0,25.0,0.0,10.0,0.0,100.0,1869.0
Arlington Heights,3.0,0.0,36.0,29.0,0.0,7.0,0.0,43.0,244.0
...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,19.0,11.0,43.0,6.0,0.0,4.0,1.0,77.0,2669.0
West End,8.0,4.0,21.0,7.0,2.0,6.0,4.0,156.0,254.0
West Oakland,24.0,11.0,59.0,17.0,2.0,9.0,2.0,126.0,2604.0
Westwood,33.0,3.0,50.0,11.0,0.0,10.0,0.0,68.0,3066.0


In [6]:
# Theft 4
# Burglary 4
# Simple Assault 2
# Aggravated Assault 4
# Homicide 10
# Robbery 4
# Kidnapping 8
weighted = crimeInstances
weighted["Theft"] = crimeInstances["Theft"]*4
weighted["Burglary"] = crimeInstances["Burglary"]*4
weighted["Simple Assault"] = crimeInstances["Simple Assault"]*2
weighted["Aggravated Assault"] = crimeInstances["Aggravated Assault"]*4
weighted["Homicide"] = crimeInstances["Homicide"]*10
weighted["Robbery"] = crimeInstances["Robbery"]*4
weighted["Kidnapping"] = crimeInstances["Kidnapping"]*4

In [7]:
weighted = weighted.fillna(0)
weighted

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010
Allegheny Center,144.0,28.0,140.0,248.0,10.0,92.0,16.0,690.0,933.0
Allegheny West,40.0,24.0,22.0,12.0,0.0,0.0,0.0,63.0,462.0
Allentown,144.0,64.0,338.0,148.0,10.0,64.0,4.0,442.0,2500.0
Arlington,72.0,28.0,130.0,100.0,0.0,40.0,0.0,100.0,1869.0
Arlington Heights,12.0,0.0,72.0,116.0,0.0,28.0,0.0,43.0,244.0
...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,76.0,44.0,86.0,24.0,0.0,16.0,4.0,77.0,2669.0
West End,32.0,16.0,42.0,28.0,20.0,24.0,16.0,156.0,254.0
West Oakland,96.0,44.0,118.0,68.0,20.0,36.0,8.0,126.0,2604.0
Westwood,132.0,12.0,100.0,44.0,0.0,40.0,0.0,68.0,3066.0


In [8]:
weighted["Total"] = weighted["Theft"]+weighted["Burglary"]+weighted["Simple Assault"]+weighted["Aggravated Assault"]+weighted["Homicide"]+weighted["Robbery"]+weighted["Kidnapping"]+weighted["Other"]
weighted

Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010,Total
Allegheny Center,144.0,28.0,140.0,248.0,10.0,92.0,16.0,690.0,933.0,1368.0
Allegheny West,40.0,24.0,22.0,12.0,0.0,0.0,0.0,63.0,462.0,161.0
Allentown,144.0,64.0,338.0,148.0,10.0,64.0,4.0,442.0,2500.0,1214.0
Arlington,72.0,28.0,130.0,100.0,0.0,40.0,0.0,100.0,1869.0,470.0
Arlington Heights,12.0,0.0,72.0,116.0,0.0,28.0,0.0,43.0,244.0,271.0
...,...,...,...,...,...,...,...,...,...,...
Upper Lawrenceville,76.0,44.0,86.0,24.0,0.0,16.0,4.0,77.0,2669.0,327.0
West End,32.0,16.0,42.0,28.0,20.0,24.0,16.0,156.0,254.0,334.0
West Oakland,96.0,44.0,118.0,68.0,20.0,36.0,8.0,126.0,2604.0,516.0
Westwood,132.0,12.0,100.0,44.0,0.0,40.0,0.0,68.0,3066.0,396.0


In [15]:
weighted["Total/Pop"]=weighted["Total"]/weighted["Pop. 2010"]
weighted
weighted.sort_values("Total/Pop").tail(10)


Unnamed: 0,Theft,Burglary,Simple Assault,Aggravated Assault,Homicide,Robbery,Kidnapping,Other,Pop. 2010,Total,Total/Pop
Chateau,88.0,20.0,68.0,64.0,0.0,16.0,12.0,251.0,11.0,519.0,47.181818
Outside State,8.0,0.0,8.0,0.0,0.0,0.0,0.0,43.0,0.0,59.0,inf
Outside City,24.0,0.0,10.0,0.0,0.0,4.0,0.0,209.0,0.0,247.0,inf
Central North Side,244.0,80.0,140.0,100.0,0.0,88.0,8.0,485.0,0.0,1145.0,inf
Troy Hill-Herrs Island,0.0,8.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,12.0,inf
Mt. Oliver Neighborhood,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,inf
Mt. Oliver Boro,4.0,0.0,8.0,0.0,0.0,0.0,0.0,12.0,0.0,24.0,inf
Mount Oliver,24.0,20.0,68.0,72.0,0.0,4.0,0.0,41.0,0.0,229.0,inf
Golden Triangle/Civic Arena,52.0,16.0,20.0,24.0,10.0,40.0,0.0,42.0,0.0,204.0,inf
Outside County,8.0,0.0,2.0,0.0,0.0,0.0,0.0,39.0,0.0,49.0,inf


My conclusion is that the best neighborhoods to live in are Mt. oliver Central Northside and Squirrel HIll NOrth as they have the least crimes by population. The data also shows places with ctimes but no pop but they most likely didnt have census data and should be excluded. SO the worst neighborhood in the group is chateau due to its very high crime rate.