# The best neighborhood in Pittsburgh by 3 metrics: Housing, Arrests, and Playgrounds

Playground Data

In [1]:
#Playground Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

playground_equipment = pd.read_csv("playground_data.csv")
playground_equipment.head()

grouped_equipment = playground_equipment.groupby('neighborhood')

# creating the dictionary with every neighborhood in the dataset and assigning it a score of 0 to start
playground_scores = dict()
for neighborhood in playground_equipment.neighborhood.unique():
    playground_scores[neighborhood] = 0

# setting up the constants
num_playgrounds_weight = 2  
ada_accessible_weight = 4
quality_weight = 10

# adding the score for number of unique equipment pieces (quality)
for i in range(grouped_equipment.equipment_type.nunique().count()):
    playground_scores[grouped_equipment.equipment_type.nunique().index[i]] += grouped_equipment.equipment_type.nunique()[i]

# adding the score for the number of ADA accessible equipment pieces
for i in range(grouped_equipment.ada_accessible.value_counts().count()):
    if grouped_equipment.ada_accessible.value_counts().index[i][1] == "t":
        playground_scores[grouped_equipment.ada_accessible.value_counts().index[i][0]] += grouped_equipment.ada_accessible.value_counts()[i] * ada_accessible_weight

# weight the quality score
for neighborhood in playground_scores:
    playground_scores[neighborhood] *= quality_weight
        
# adding the score for number of pieces of equipment (quantity)
for i in range(grouped_equipment.id.count().count()):
    playground_scores[grouped_equipment.id.count().index[i]] += grouped_equipment.id.count()[i]

# adding the score for number of playgrounds (quantity)
for i in range(grouped_equipment.name.nunique().count()):
    playground_scores[grouped_equipment.name.nunique().index[i]] += grouped_equipment.name.nunique()[i] * num_playgrounds_weight

# normalizing the score to a (0, 1) range
max_score = max(playground_scores.values())
for neighborhood in playground_scores:
    playground_scores[neighborhood] /= max_score
    playground_scores[neighborhood] = round(playground_scores[neighborhood], 3)

Arrests Data

In [16]:
arrests = pd.read_csv("https://data.wprdc.org/datastore/dump/e03a89dd-134a-4ee8-a2bd-62c40aeebc6f")
# Import RegEx for Python
import re

# Arrays of codes based on special point values
plus_2 = [3929, 2705, 3304, 3921, 3736]
plus_3 = [4304, 2701, 3701, 3502, 2709, 3702, 3124, 3126]
plus_4 = [2702]
plus_5 = [3121, 2504, 2718]
plus_7 = [2901]
plus_10 = [2501, 3732, 3735]

# Function to calculate scores based on codes
def get_value(code):
    if code in plus_2:
        return 2
    elif code in plus_3:
        return 3
    elif code in plus_4:
        return 4
    elif code in plus_5:
        return 5
    elif code in plus_7:
        return 7
    elif code in plus_10:
        return 10
    else:
        return 1


scores = {}

# Get each row from arrests
for arrest in arrests.index:
    
    # Make sure a valid entry is made in each column
    if isinstance(arrests['INCIDENTNEIGHBORHOOD'][arrest], str) and isinstance(arrests["OFFENSES"][arrest], str):
        
        neighborhood = arrests['INCIDENTNEIGHBORHOOD'][arrest]
        offences_str = arrests["OFFENSES"][arrest]
        
        # Make array of individual offences
        offences = [offence.strip() for offence in offences_str.split('/')]
        for offence in offences:
            
            # Get offence code by regex
            offence_code = re.search(r'\d+', offence)
            if offence_code != None:
                offence_code = int(offence_code.group())
            
            # Get point value for that code
            points = get_value(offence_code)
            
            # Filter out bad input
            if neighborhood[:3] != "Mt." and "-Herrs" not in neighborhood:
                
                # Add to scores dictionary
                if neighborhood in scores:
                    scores[neighborhood] += points
                else:
                    scores[neighborhood] = points
                    
# Normalize the score
max_score = max(scores.values())
for score in scores:
    scores[score] /= max_score
    scores[score] = round(scores[score], 3)

Combining all of them:

In [32]:
final_scores = dict()

playground_weighting = 1
arrest_weighting = 1

#adding scores for playgrounds
for neighborhood in playground_scores:
    final_scores[neighborhood] = playground_scores[neighborhood] * playground_weighting

#adding scores for arrests
for neighborhood in scores:
    if neighborhood in final_scores:
        final_scores[neighborhood] = final_scores[neighborhood] + (1 - scores[neighborhood]) * arrest_weighting
    else:
        final_scores[neighborhood] = (1 - scores[neighborhood]) * arrest_weighting

#sorting total
final_sorted = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)
for i in range(10):
    print(str(i + 1) + ". " + final_sorted[i][0] + " - " + str(final_sorted[i][1]))

1. Squirrel Hill South - 1.78
2. Elliott - 1.312
3. Beechview - 1.267
4. South Side Slopes - 1.256
5. Highland Park - 1.241
6. Regent Square - 1.229
7. Morningside - 1.225
8. Swisshelm Park - 1.22
9. Beltzhoover - 1.212
10. New Homestead - 1.173
