11/18/2023

Pruning all other datasets to (potentially) use for the SPD project.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re

In [2]:
def remove_unwanted(dataset, arr_keep): # helper function for getting rid of unwanted columns
    arr_remove = np.setdiff1d(list(dataset), arr_keep) # use set calculations to isolate all things we don't want
    print(arr_remove)
    for k in range(len(arr_remove)): # remove all columns we don't want
        dataset = dataset.drop(arr_remove[k], axis=1)
    return dataset

In [3]:
ogcrime = pd.read_csv("SPD_Crime_Data__2008-Present.csv")
ogdemo = pd.read_csv("Census_Tract_Top_50_American_Communiy_Survey_Data.csv")
ogemploy = pd.read_csv("1980-2022 FTLEE Total.csv")

In [4]:
crime = ogcrime.copy()

In [5]:
crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1086921 entries, 0 to 1086920
Data columns (total 17 columns):
 #   Column                  Non-Null Count    Dtype  
---  ------                  --------------    -----  
 0   Report Number           1086921 non-null  object 
 1   Offense ID              1086921 non-null  int64  
 2   Offense Start DateTime  1085392 non-null  object 
 3   Offense End DateTime    618352 non-null   object 
 4   Report DateTime         1086921 non-null  object 
 5   Group A B               1086921 non-null  object 
 6   Crime Against Category  1086921 non-null  object 
 7   Offense Parent Group    1086921 non-null  object 
 8   Offense                 1086921 non-null  object 
 9   Offense Code            1086921 non-null  object 
 10  Precinct                1086902 non-null  object 
 11  Sector                  1086904 non-null  object 
 12  Beat                    1086904 non-null  object 
 13  MCPP                    1086906 non-null  object 
 14  10

In [74]:
crime.head()

Unnamed: 0,Report Number,Offense ID,Offense Start DateTime,Offense End DateTime,Report DateTime,Group A B,Crime Against Category,Offense Parent Group,Offense,Offense Code,Precinct,Sector,Beat,MCPP,100 Block Address,Longitude,Latitude
0,2020-044620,12605873663,02/05/2020 10:10:00 AM,,02/05/2020 11:24:31 AM,A,SOCIETY,DRUG/NARCOTIC OFFENSES,Drug/Narcotic Violations,35A,W,Q,Q1,MAGNOLIA,32XX BLOCK OF 23RD AVE W,-122.385974,47.649387
1,2020-044452,12605598696,02/03/2020 08:00:00 AM,02/04/2020 08:00:00 AM,02/05/2020 10:06:28 AM,A,PROPERTY,LARCENY-THEFT,Theft of Motor Vehicle Parts or Accessories,23G,N,J,J3,ROOSEVELT/RAVENNA,63XX BLOCK OF 5TH AVE NE,-122.323399,47.675118
2,2020-044465,12605567653,02/02/2020 08:30:00 PM,02/02/2020 09:30:00 PM,02/05/2020 09:39:33 AM,A,PROPERTY,ROBBERY,Robbery,120,N,U,U3,ROOSEVELT/RAVENNA,26TH AVE NE / NE BLAKELEY ST,-122.299552,47.666384
3,2020-044225,12605174036,02/05/2020 01:17:00 AM,02/05/2020 02:21:00 AM,02/05/2020 03:30:55 AM,A,PROPERTY,DESTRUCTION/DAMAGE/VANDALISM OF PROPERTY,Destruction/Damage/Vandalism of Property,290,W,Q,Q1,MAGNOLIA,22XX BLOCK OF W RAYE ST,-122.384865,47.642927
4,2020-044076,12605081469,02/05/2020 12:51:21 AM,,02/05/2020 12:51:31 AM,B,SOCIETY,DRIVING UNDER THE INFLUENCE,Driving Under the Influence,90D,N,B,B2,BALLARD SOUTH,NW 46TH ST / 8TH AVE NW,-122.366195,47.662193


In [6]:
# remove columns we determined were useless
crime = remove_unwanted(crime, ["Report Numbr", "Offense Start DateTime", "Beat"]) # list of attributes to keep

['100 Block Address' 'Crime Against Category' 'Group A B' 'Latitude'
 'Longitude' 'MCPP' 'Offense' 'Offense Code' 'Offense End DateTime'
 'Offense ID' 'Offense Parent Group' 'Precinct' 'Report DateTime' 'Sector']


In [7]:
crime["Offense Start DateTime"] = pd.to_datetime(crime["Offense Start DateTime"], format='%m/%d/%Y %I:%M:%S %p') 
crime = crime[(crime["Offense Start DateTime"].dt.year < 2023)] #prune years to match those of main dataset
crime = crime[(crime["Offense Start DateTime"].dt.year > 2009)]
crime.reset_index(drop=True, inplace=True)
crime["Og_Year"] = crime["Offense Start DatTime"].dt.year #extract yeats
crime = crime.drop("Offense Start DateTime", axis=1)

In [8]:
crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 894140 entries, 0 to 894139
Data columns (total 3 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   Report Number  894140 non-null  object
 1   Beat           894136 non-null  object
 2   Og_Year        894140 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 20.5+ MB


In [11]:
crime = crime[(crime["Beat"] != "UNKNOWN")] # get rid of anomalous beats
crime = crime.dropna(how='any',axis=0)
crime.reset_index(drop=True, inplace=Tre)

In [12]:
crime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 888465 entries, 0 to 888464
Data columns (total 3 columns):
 #   Column         Non-Null Count   Dtype 
---  ------         --------------   ----- 
 0   Report Number  888465 non-null  object
 1   Beat           888465 non-null  object
 2   Og_Year        888465 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 20.3+ MB


In [13]:
crime.head()

Unnamed: 0,Report Number,Beat,Og_Year
0,2020-044620,Q1,2020
1,2020-044452,J3,2020
2,2020-044465,U3,2020
3,2020-044225,Q1,2020
4,2020-044076,B2,2020


In [14]:
#crime.to_csv('crime_pruned_better.csv', index=False)

In [15]:
employ = ogemploy.copy()

In [16]:
employ.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11538 entries, 0 to 11537
Data columns (total 13 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Year                                11538 non-null  int64  
 1   County                              11538 non-null  object 
 2   Agency                              11538 non-null  object 
 3   Population                          11538 non-null  object 
 4   Commissioned Male                   11537 non-null  object 
 5   Commissioned Female                 11537 non-null  float64
 6   Commissioned Total                  11538 non-null  object 
 7   Commissioned Rate per 1,000 in Pop  11538 non-null  float64
 8   Civilian Male                       11537 non-null  float64
 9   Civilian Female                     11536 non-null  float64
 10  Civilian Total                      11538 non-null  object 
 11  Civilian Rate per 1,000 in Pop      11538

In [17]:
# remove columns we determined were useless
employ = remove_unwanted(employ,["Year", "Agncy", "Population", "Commissioned Total", "Commissioned Rate per 1,000 in Pop"])

['Civilian Female' 'Civilian Male' 'Civilian Rate per 1,000 in Pop'
 'Civilian Total' 'Commissioned Female' 'Commissioned Male' 'County'
 'Total Full-Time Employees']


In [18]:
employ = employ[(employ["Agency"] == "Seattle Police Department")]
employ = employ[(employ["Year"] > 2009)]
employ.reset_index(drop=True, inplace=True)

In [19]:
employ.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 5 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Year                                13 non-null     int64  
 1   Agency                              13 non-null     object 
 2   Population                          13 non-null     object 
 3   Commissioned Total                  13 non-null     object 
 4   Commissioned Rate per 1,000 in Pop  13 non-null     float64
dtypes: float64(1), int64(1), object(3)
memory usage: 648.0+ bytes


In [101]:
print(employ)

    Year                     Agency Population Commissioned Total  \
0   2010  Seattle Police Department    612,000              1,344   
1   2011  Seattle Police Department    612,100              1,305   
2   2012  Seattle Police Department    616,500              1,289   
3   2013  Seattle Police Department    626,600              1,294   
4   2014  Seattle Police Department    640,500              1,323   
5   2015  Seattle Police Department    662,400              1,350   
6   2016  Seattle Police Department    686,800              1,384   
7   2017  Seattle Police Department    713,700              1,448   
8   2018  Seattle Police Department    730,400              1,420   
9   2019  Seattle Police Department    747,300              1,416   
10  2020  Seattle Police Department    761,100              1,341   
11  2021  Seattle Police Department    769,500               1178   
12  2022  Seattle Police Department    762,500               1077   

    Commissioned Rate per 1,000 i

In [102]:
employ["Commissioned Total"] = [int(num.replace(',', '')) for num in employ["Commissioned Total"]] # remove commas to turn values into ints
employ["Population"] = [int(num.replace(',', '')) for num in employ["Population"]]

In [103]:
#employ.to_csv('employ_pruned.csv', index=False)

In [115]:
demo = ogdemo.copy()

In [116]:
demo.head(5)

Unnamed: 0,OBJECTID,GEOID,NAME,TRACT_LABEL,ACRES_LAND,JURISDICTION,CRA_NO,CRA_GRP,GEN_ALIAS,DETL_NAMES,...,PCT_POPULATION_UNDER_POVERTY,All_Families_whom_poverty_det,All_Families_under_Poverty,PCT_ALL_FAMILY_UNDER_POVERTY,POP_INCPOV_RATIO_2_00_AND_OVER,POP_INCPOV_RATIO_BELOW_2_00,PCT_POP_BELOW_200_POVERTY,ACS_VINTAGE,SHAPE_Length,SHAPE_Area
0,795,53033001500,Census Tract 15,15.0,283.174324,Seattle,10.1,10.0,North Beach/Blue Ridge,"Crown Hill, North Beach, Blue Ridge",...,4.7,608,6,1.0,2203,195,8.1,5Y15,0.046785,0.000137
1,796,53033001600,Census Tract 16,16.0,476.750292,Seattle,10.1,10.0,North Beach/Blue Ridge,"Crown Hill, North Beach, Blue Ridge",...,6.4,1099,60,5.5,3572,531,12.9,5Y15,0.079382,0.000231
2,797,53033020100,Census Tract 201,201.0,429.128356,King County Balance,,,,,...,5.6,842,9,1.1,2663,437,14.1,5Y15,0.060516,0.000208
3,798,53033003000,Census Tract 30,30.0,368.968923,Seattle,10.2,10.0,Whittier Heights,"Whittier Heights, Loyal Heights, Crown Hill",...,6.0,1320,30,2.3,4945,764,13.4,5Y15,0.054577,0.000179
4,799,53033001702,Census Tract 17.02,17.02,316.330411,Seattle,9.3,9.0,Greenwood/Phinney Ridge,"Greenwood, Phinney Ridge, Woodland Park, Crown...",...,10.3,838,32,3.8,3217,1058,24.7,5Y15,0.059178,0.000153


In [117]:
demo = demo[(demo["JURISDICTION"] == "Seattle")]
demo.reset_index(drop=True, inplace=True)

In [118]:
# remove columns we determined were useless
demo = remove_unwanted(demo, ["GEN_ALIAS", 'TOTAL_POPULATION', 'PCT_NOTHISP_WHITE_ONE', 'PCT_NOTHISP_BLACK_ONE', 'PCT_NOTHISP_AMIAK_ONE', "PCT_NOTHISP_ASIAN_ONE", "PCT_NOTHISP_PI_ONE", "PCT_HISP_ANY_RACE", 
                              'PCT_POP_UNDER_18', "PCT_POP_IN_HH", "PCT_POPULATION_UNDER_POVERTY", "ACS_VINTAGE"]) # list of attributes to keep

#originally left these in, but person of color summarizes all of them: 
#'DETL_NAMES', 'PCT_POP_65_OVER', 'MEDIAN_AGE', 'PCT_NOTHISP_WHITE_ONE', PCT_NOTHISP_BLACK_ONE', 'PCT_NOTHISP_AMIAK_ONE', "PCT_NOTHISP_ASIAN_ONE", "PCT_NOTHISP_PI_ONE", "PCT_NOTHISP_OTHER_ONE", "PCT_NOTHISP_TWO_ONE", "PCT_HISP_ANY_RACE", "MEDIAN_HH_INC_PAST_12MO_DOLLAR"

['ACRES_LAND' 'AGE_18_AND_OVER' 'AVERAGE_FAMILY_SIZE'
 'AVERAGE_HOUSEHOLD_SIZE' 'Age_65_and_over' 'Age_Under_18'
 'All_Families_under_Poverty' 'All_Families_whom_poverty_det'
 'Bachelor_degree_or_higher' 'CIVILIAN_LABOR_FORCE'
 'CIVILIAN_LABOR_FORCE_EMPLOYED' 'CIVILIAN_LABOR_FORCE_UNEMPLOYD'
 'CRA_GRP' 'CRA_NO' 'DETL_NAMES' 'FAMILY_HOUSEHOLDS' 'GEOID'
 'GRAPI_15_0_TO_19_9' 'GRAPI_20_0_TO_24_9' 'GRAPI_25_0_TO_29_9'
 'GRAPI_30_0_TO_34_9' 'GRAPI_35_0_OR_MORE' 'GRAPI_LESS_THAN_15_0'
 'GRAPI_NOT_COMPUTED' 'HH_INCOME_100_000_TO_149_999'
 'HH_INCOME_10_000_TO_14_999' 'HH_INCOME_150_000_TO_199_999'
 'HH_INCOME_15_000_TO_24_999' 'HH_INCOME_200_000_OR_MORE'
 'HH_INCOME_25_000_TO_34_999' 'HH_INCOME_35_000_TO_49_999'
 'HH_INCOME_50_000_TO_74_999' 'HH_INCOME_75_000_TO_99_999'
 'HH_INCOME_LESS_THAN_10_000' 'HISPANIC_OR_LATINO_OF_ANY_RACE'
 'HOUSEHOLDER_LIVING_ALONE' 'HOUSEHOLDS' 'HU_VALUE_100_000_TO_149_999'
 'HU_VALUE_150_000_TO_199_999' 'HU_VALUE_1_000_000_OR_MORE'
 'HU_VALUE_200_000_TO_299_999' '

In [119]:
demo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 618 entries, 0 to 617
Data columns (total 12 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   GEN_ALIAS                     618 non-null    object 
 1   TOTAL_POPULATION              618 non-null    int64  
 2   PCT_POP_UNDER_18              618 non-null    float64
 3   PCT_NOTHISP_WHITE_ONE         618 non-null    float64
 4   PCT_NOTHISP_BLACK_ONE         618 non-null    float64
 5   PCT_NOTHISP_AMIAK_ONE         618 non-null    float64
 6   PCT_NOTHISP_ASIAN_ONE         618 non-null    float64
 7   PCT_NOTHISP_PI_ONE            618 non-null    float64
 8   PCT_HISP_ANY_RACE             618 non-null    float64
 9   PCT_POP_IN_HH                 618 non-null    float64
 10  PCT_POPULATION_UNDER_POVERTY  618 non-null    float64
 11  ACS_VINTAGE                   618 non-null    object 
dtypes: float64(9), int64(1), object(2)
memory usage: 58.1+ KB


In [12]:
#demo.to_csv('demo_pruned_full.csv', index=False)

In [120]:
demo = demo[(demo["ACS_VINTAGE"] != "5Y21")]
demo.reset_index(drop=True, inplace=True)

In [121]:
demo["Year"] = demo["ACS_VINTAGE"].map({"5Y10": 2010, "5Y15": 2015, "5Y20": 2020}) # turned weird census estimation deliniations into year labels
demo = demo.drop("ACS_VINTAGE", axis=1)

In [122]:
demo.head()

Unnamed: 0,GEN_ALIAS,TOTAL_POPULATION,PCT_POP_UNDER_18,PCT_NOTHISP_WHITE_ONE,PCT_NOTHISP_BLACK_ONE,PCT_NOTHISP_AMIAK_ONE,PCT_NOTHISP_ASIAN_ONE,PCT_NOTHISP_PI_ONE,PCT_HISP_ANY_RACE,PCT_POP_IN_HH,PCT_POPULATION_UNDER_POVERTY,Year
0,North Beach/Blue Ridge,2398,19.3,92.0,0.0,0.2,2.4,0.0,3.9,100.0,4.7,2015
1,North Beach/Blue Ridge,4111,18.4,82.3,0.6,0.3,6.2,0.0,4.9,99.1,6.4,2015
2,Whittier Heights,5719,23.0,85.1,0.6,0.0,4.1,0.2,5.1,99.8,6.0,2015
3,Greenwood/Phinney Ridge,4408,11.8,74.5,1.4,0.7,9.9,0.0,7.0,96.8,10.3,2015
4,Greenwood/Phinney Ridge,4303,20.3,89.1,0.0,0.1,4.2,0.2,1.9,99.8,4.1,2015


In [123]:
#demo.to_csv('demo_pruned_better2.csv', index=False)

From here, I grouped by year and name and got the averages of everything in order to make data processing easier.

In [124]:
demo2 = pd.read_csv("demo_pruned_better4.csv")

In [125]:
demo2.head()

Unnamed: 0,Year,Name,totpop,young,white,black,native,asian,island,hisp,housed,poverty
0,2010,Alki/Admiral,5668.0,14.65,86.65,1.3,0.4,4.3,0.4,3.5,97.55,6.15
1,2010,Arbor Heights,3098.5,18.85,78.85,1.4,2.0,6.7,0.0,5.6,100.0,5.1
2,2010,Ballard,5764.0,7.0,79.6,1.7,0.0,6.4,0.4,9.0,100.0,9.4
3,2010,Beacon Hill,4485.0,22.2,22.85,13.0,0.0,55.5,3.65,1.55,99.8,6.45
4,2010,Belltown,3867.0,1.95,71.95,6.65,0.85,11.05,0.0,6.15,100.0,19.1


In [126]:
# turned neighborhood names into beats in order to match with main dataset
demo2["Beat"] = demo2["Name"].map({"Alki/Admiral":["W1"],"Arbor Heights":["W3"],"Ballard":["B1", "J1", "J2"],"Beacon Hill":["R1"],"Belltown":["D1"],
                                      "Broadview/Bitter Lake":["N1"],"Capitol Hill":["E1", "E2"],"Cascade/Eastlake":["D3"],"Cedar Park/Meadowbrook":["L1"],
                                      "Central Area/Squire Park":["C3"],"Columbia City":["R3"],"Downtown Commercial Core":["M1", "M2", "M3", "K1"],"Duwamish/SODO":["O1"],
                                      "Fauntleroy/Seaview":["W3"],"First Hill":["E3", "G1"],"Fremont":["B2"],"Georgetown":["O2", "O3"],"Green Lake":["J3"],
                                      "Greenwood/Phinney Ridge":["J1"],"Haller Lake":["N2"],"High Point":["F1"],"Highland Park":["F3"],"Interbay":["Q1"],"Judkins Park":["G3"],
                                      "Laurelhurst/Sand Point":["U3"],"Licton Springs":["N3"],"Madison Park":["C2"],"Madrona/Leschi":["G2"],"Magnolia":["Q1"],"Miller Park":["C2"],
                                      "Montlake/Portage Bay":["C2"],"Mt. Baker/North Rainier":["R2"],"North Beach/Blue Ridge":["N1"],"North Beacon Hill/Jefferson Park":["R1"],
                                      "North Capitol Hill":["C1"],"North Delridge":["F1"],"Northgate/Maple Leaf":["L2"],"Olympic Hills/Victory Heights":["L1"],
                                      "Pioneer Square/International District":["K2", "K3"],"Queen Anne":["Q2", "Q3", "D2"],"Rainier Beach":["S2", "S3"],"Ravenna/Bryant":["U1"],
                                      "Riverview":["F1"],"Roxhill/Westwood":["F2"],"Seward Park":["R3"],"South Beacon Hill/NewHolly":["S1"],"South Park":["F3"],
                                      "Sunset Hill/Loyal Heights":["J1", "J2"],"University District":["U2"],"Wallingford":["B3"],"Wedgwood/View Ridge":["L3"],
                                      "West Seattle Junction/Genesee Hill":["W2"],"Whittier Heights":["J2"]})

In [127]:
demo2.head()

Unnamed: 0,Year,Name,totpop,young,white,black,native,asian,island,hisp,housed,poverty,Beat
0,2010,Alki/Admiral,5668.0,14.65,86.65,1.3,0.4,4.3,0.4,3.5,97.55,6.15,[W1]
1,2010,Arbor Heights,3098.5,18.85,78.85,1.4,2.0,6.7,0.0,5.6,100.0,5.1,[W3]
2,2010,Ballard,5764.0,7.0,79.6,1.7,0.0,6.4,0.4,9.0,100.0,9.4,"[B1, J1, J2]"
3,2010,Beacon Hill,4485.0,22.2,22.85,13.0,0.0,55.5,3.65,1.55,99.8,6.45,[R1]
4,2010,Belltown,3867.0,1.95,71.95,6.65,0.85,11.05,0.0,6.15,100.0,19.1,[D1]


In [128]:
demo2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     159 non-null    int64  
 1   Name     159 non-null    object 
 2   totpop   159 non-null    float64
 3   young    159 non-null    float64
 4   white    159 non-null    float64
 5   black    159 non-null    float64
 6   native   159 non-null    float64
 7   asian    159 non-null    float64
 8   island   159 non-null    float64
 9   hisp     159 non-null    float64
 10  housed   159 non-null    float64
 11  poverty  159 non-null    float64
 12  Beat     159 non-null    object 
dtypes: float64(10), int64(1), object(2)
memory usage: 16.3+ KB


In [133]:
demo2_new = demo2.copy()

In [134]:
# some rows have multiple beats due to the sizes of their respective neighborhoods
# this code expands those rows, making one row for each beat in their original beat array
for index, row in demo2_new.iterrows():
    #print(index)
    beats = np.array(row['Beat'])
    if len(beats) != 1:
        temp = beats[1:]
        for k in range(len(temp)):
            demo2_new.loc[len(demo2_new.index)] = [row["Year"], row['Name'], row["totpop"], row["young"], row["white"], row["black"], row["native"], row["asian"], row["island"],
                                                   row["hisp"], row["housed"], row["poverty"], temp[k]]
    demo2_new.loc[index, ['Beat']] = beats[0]

In [135]:
demo2_new.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 198 entries, 0 to 197
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Year     198 non-null    int64  
 1   Name     198 non-null    object 
 2   totpop   198 non-null    float64
 3   young    198 non-null    float64
 4   white    198 non-null    float64
 5   black    198 non-null    float64
 6   native   198 non-null    float64
 7   asian    198 non-null    float64
 8   island   198 non-null    float64
 9   hisp     198 non-null    float64
 10  housed   198 non-null    float64
 11  poverty  198 non-null    float64
 12  Beat     198 non-null    object 
dtypes: float64(10), int64(1), object(2)
memory usage: 29.8+ KB


In [136]:
for index, row in demo2_new.iterrows():
    print(row['Name'])
    print(row['Beat'])
# used for debugging beat expansion

Alki/Admiral
W1
Arbor Heights
W3
Ballard
B1
Beacon Hill
R1
Belltown
D1
Broadview/Bitter Lake
N1
Capitol Hill
E1
Cascade/Eastlake
D3
Cedar Park/Meadowbrook
L1
Central Area/Squire Park
C3
Columbia City
R3
Downtown Commercial Core
M1
Duwamish/SODO
O1
Fauntleroy/Seaview
W3
First Hill
E3
Fremont
B2
Georgetown
O2
Green Lake
J3
Greenwood/Phinney Ridge
J1
Haller Lake
N2
High Point
F1
Highland Park
F3
Interbay
Q1
Judkins Park
G3
Laurelhurst/Sand Point
U3
Licton Springs
N3
Madison Park
C2
Madrona/Leschi
G2
Magnolia
Q1
Miller Park
C2
Montlake/Portage Bay
C2
Mt. Baker/North Rainier
R2
North Beach/Blue Ridge
N1
North Beacon Hill/Jefferson Park
R1
North Capitol Hill
C1
North Delridge
F1
Northgate/Maple Leaf
L2
Olympic Hills/Victory Heights
L1
Pioneer Square/International District
K2
Queen Anne
Q2
Rainier Beach
S2
Ravenna/Bryant
U1
Riverview
F1
Roxhill/Westwood
F2
Seward Park
R3
South Beacon Hill/NewHolly
S1
South Park
F3
Sunset Hill/Loyal Heights
J1
University District
U2
Wallingford
B3
Wedgwood/Vie

In [137]:
#demo2_new.to_csv("demo_pruned_final2.csv", index=False)