In [14]:
# This file processes the data in the “Public Life” category of the ranking system. This category is looking at
# two different data sets. One which looks at the number of public city facilities in a neighborhood and one 
# which looks at the amount of public art in a neighborhood. The more a neighborhood has of either of these two 
# things, the higher that neighborhood will rank. 
# Author: Paolo Hidalgo (phidalgodiblasio)

# Import libraries
import pandas as pd

def main():
    # Create data frames
    facilities = pd.read_csv("City Facilities.csv")
    art = pd.read_csv("City of Pittsburgh Public Art.csv")

    # Create new dara frames with neighborhoods ranked
    facilities_ranked = rank_data(facilities) # The ranking is based on how many facilities a neighborhood has.
                                              # More facilities equates to a better rank. 
    art_ranked = rank_data(art) # The ranking is based on how many pieces of public art a neighborhood has.
                                # More art equates to a better rank. 
    
    # Save df in csv files
    facilities_ranked.to_csv("facilities_ranked.csv", index = False)
    art_ranked.to_csv("art_ranked.csv", index = False)
    
    # Display the ranked data frames
    print("City Facilities:")
    print(facilities_ranked)

    print("\n==================================\n")
        
    print("Public Art:")
    print(art_ranked)

def rank_data(df):
    df_ranked = pd.DataFrame(columns=['neighborhood', 'rank'])
    
    # Rank neighborhoods in df
    i = 0
    for n, r in df["neighborhood"].value_counts().sort_values(ascending = False).iteritems():
        df_ranked.at[i, "neighborhood"] = n
        df_ranked.at[i, "rank"] = i + 1
        i += 1
    
    # Rank neighborhoods not in df
    r = i + 1
    for n in open('neighborhoods.txt', 'r').read().splitlines():
        # If neighborhood not in file, add it to the end
        if not(df_ranked["neighborhood"].str.contains(n).any()):
            df_ranked.at[i, "neighborhood"] = n
            df_ranked.at[i, "rank"] = r
            i += 1
       
    return df_ranked

main()

City Facilities:
           neighborhood rank
0         Highland Park    1
1   Squirrel Hill South    2
2           Perry North    3
3             Brookline    4
4        Strip District    5
..                  ...  ...
85            Ridgemont   76
86          South Shore   76
87        Spring Garden   76
88            St. Clair   76
89          Summer Hill   76

[90 rows x 2 columns]


Public Art:
                 neighborhood rank
0   Central Business District    1
1         Squirrel Hill South    2
2            Allegheny Center    3
3               Highland Park    4
4            South Side Flats    5
..                        ...  ...
85                Summer Hill   56
86            Terrace Village   56
87               West Oakland   56
88                   Westwood   56
89                    Windgap   56

[90 rows x 2 columns]
