In [794]:
# This script will clean and analyze data collected on the Seattle rental housing market as of October 2021. 
# All records are anonymized and randomized. The goal is to find correlations between income and opinions of 
# Seattle neighborhoods in an effort to predict gentrification. The data is an accurate sample of the greater 
# Seattle population, in terms of demographic distribution.

# Credit: Mills Selkregg
# Colorado State University Global Campus
# Masters Thesis, November 6th, 2021
# Contact: selkreggmills@gmail.com

# Import all necessary packages. 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt
import collections 
%matplotlib inline

In [815]:
# Upload CSV file from your respective directory.
df = pd.read_csv('~/Desktop/responses.csv')

In [795]:
# Check the shape of the dataframe. 
df.shape

(23, 72)

In [796]:
df.head()

Unnamed: 0,#,Do you currently live in Seattle?,Have you previously lived in Seattle?,When did you live in Seattle?,How long did you live in Seattle?,Where did you move to after Seattle?,How long have you lived in Seattle?,How old are you?,What is your gender?,How would you describe yourself?,In which country were you born?,Where did you move to Seattle from?,Why did you move to Seattle?,"How many people, including yourself, live at home with you?",What is your yearly income?,Renting in Seattle is easy.,I am confident in my choice of neighborhood.,I knew a lot about Seattle before I moved here.,I visited Seattle before moving here.,I like living in Seattle.,Living in Seattle is worth the rent I pay.,I pay a fair amount for rent.,I am regularly looking for other places to rent.,I need to have a roommate to afford Seattle.,I would rather live in a different neighborhood in Seattle.,I would have chosen a different neighborhood in Seattle to live in if I had more knowledge.,Did you use any of the following tools to find housing?,Other,What alternative helped you find housing?,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley,Rank the following elements from most important to least important when deciding on housing.,What is your Seattle zip code?,Start Date (UTC),Submit Date (UTC),Network ID
0,3pfcp99mw6vlcpx3p5df30y9rnx3uj1k,0,Yes,2012.0,8 years,Everett,,25 to 34,Male,No Answer,United States of America,AZ,Youthful dream.,1,85000.0,Disagree,Agree,Strongly Disagree,Strongly Disagree,Agree,Disagree,Disagree,Agree,Strongly Agree,Disagree,Disagree,Apartments.com,,,Unsure,Yes,No,Yes,Yes,Yes,Yes,Yes,No,Yes,Unsure,Yes,Yes,Unsure,No,No,No,Yes,Unsure,Yes,No,No,No,No,Yes,Yes,No,No,No,Yes,Yes,No,Unsure,Unsure,Unsure,Unsure,Yes,Unsure,"Price of rent,Size of housing (i.e. square fee...",98101,2021-10-27 21:19:14,2021-10-27 21:25:21,58afdd1baa
1,mtxds7z4m41nr21dob5mtxds7gmj13a3,1,,,,,< 6 months,18 to 24,Female,White,United States of America,MD,Job offer,1,85000.0,Disagree,Agree,Strongly Disagree,Strongly Disagree,Agree,Neutral,Strongly Agree,Disagree,Disagree,Disagree,Disagree,Apartments.com,,,Unsure,Unsure,Unsure,Unsure,Unsure,Yes,Unsure,Yes,Unsure,No,No,Yes,Unsure,No,Yes,Yes,No,No,No,Yes,Yes,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,"Safety / lack of crime,Price of rent,Proximity...",98107,2021-10-27 20:27:32,2021-10-27 20:32:10,20a93f8158
2,o0ukw3ytii8monf5ggm61do0ukw3ytml,0,Yes,2020.0,3.5 years,Denver,,25 to 34,Female,White,United States of America,IL,"Lifestyle change, friends",2,78000.0,Disagree,Agree,Disagree,Agree,Neutral,Disagree,Disagree,Neutral,Strongly Disagree,Neutral,Neutral,Zillow,,,No,No,No,No,No,Yes,Yes,Yes,Yes,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Unsure,Yes,Yes,No,Yes,Yes,No,Yes,Unsure,"Safety / lack of crime,High quality of roommat...",98144,2021-10-27 19:48:54,2021-10-27 19:57:32,4c31e07c55
3,y817p55q9luvy7ol009odey817p55qnh,1,,,,,< 5 years,25 to 34,Male,White,United States of America,MA,For a job!,1,40000.0,Agree,Agree,Disagree,Agree,Strongly Agree,Agree,Agree,Neutral,Strongly Agree,Agree,Agree,Facebook,,,Unsure,No,No,No,No,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,Yes,No,Yes,Unsure,Yes,Yes,No,Yes,No,"Proximity to bars / social scene,Proximity to ...",98121,2021-10-27 05:07:40,2021-10-27 05:11:22,6027b5d710
4,oqlmdnmi4oqm11gmfaoqlmryi5wjfvnn,1,,,,,< 6 months,18 to 24,Female,Native American,United States of America,MN,Grad School,2,0.0,Disagree,Disagree,Disagree,Agree,Agree,Neutral,Strongly Agree,Neutral,Strongly Agree,Agree,Strongly Agree,Zillow,,,Unsure,Unsure,Unsure,Unsure,Unsure,Yes,Unsure,Yes,Yes,Yes,Unsure,Yes,Yes,Yes,Yes,Yes,Unsure,Unsure,Yes,,Yes,,Yes,No,Yes,Yes,Unsure,No,Unsure,Unsure,Unsure,No,Unsure,Unsure,Unsure,Unsure,Unsure,No,"Price of rent,Proximity to nature,Proximity to...",98121,2021-10-27 02:41:48,2021-10-27 02:49:18,2d731df42c


In [797]:
# Drop irrelevant columns. 
df = df.drop(columns = ['#','Start Date (UTC)','Submit Date (UTC)','Network ID'])

In [798]:
# Clean the data for uniformity, making yes/no/unsure binary and removing some outliers. 
df = df.replace(to_replace = ['Yes','No','Unsure','United States of America','NaN','Yes,No'],value = ['1','0','2','USA','0','0'])

In [799]:
# Move the yearly income question to a more functional location to make splicing easier.
first_column = df.pop('What is your yearly income?')
df.insert(27, 'What is your yearly income?', first_column)

In [800]:
# Create new dataframe of just income and neighborhood choices.
incomesums = df.iloc[:, 27:66]
incomesums = incomesums.drop(11, axis = 0)
incomesums = incomesums.fillna(0)
incomesums = incomesums.astype(int)

In [801]:
# Iterate through to collect yes, no, and unsure values.
i = 0
noes = []
unsures = []
yesses = []

while i < len(incomesums):
    counter = collections.Counter(incomesums.iloc[i, ])
    noes.append(counter[0])
    unsures.append(counter[1])
    yesses.append(counter[2])
    i += 1

# Add the counts to the dataframe.
incomesums['No'] = noes
incomesums['Yes'] = yesses
incomesums['Unsure'] = unsures

In [802]:
# Explore the effect that gender has on the neighborhood choices. 
incomesums['Gender'] = df['What is your gender?']
incomesums = incomesums.fillna("-")
moving = incomesums.pop('Gender')
incomesums.insert(1, 'Gender', moving)
incomesums['Gender'] = incomesums['Gender'].str.lower()
incomesums = incomesums.replace(to_replace = ['male','man','female'],value = ['m','m','f'])


In [803]:
counter = collections.Counter(incomesums.iloc[1])
print(counter)

Counter({0: 23, 2: 8, 1: 7, 85000: 1, 'f': 1, 23: 1, 8: 1, 7: 1})


In [804]:
# High Income vs. Low Income Yes/No/Unsure
highincomes = incomesums.loc[incomesums['What is your yearly income?']>=90000,:]
lowincomes = incomesums.loc[incomesums['What is your yearly income?']<90000,:]

highincomes.append(highincomes.sum(numeric_only = True), ignore_index = True)
lowincomes.append(lowincomes.sum(numeric_only = True), ignore_index = True)

Unnamed: 0,What is your yearly income?,Gender,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley,No,Yes,Unsure
0,85000.0,m,2.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,2.0,2.0,2.0,2.0,1.0,2.0,13.0,9.0,16.0
1,85000.0,f,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,8.0,7.0
2,78000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,9.0,2.0,27.0
3,40000.0,m,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,7.0,2.0,29.0
4,0.0,f,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,2.0,0.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,7.0,18.0,14.0
5,28000.0,f,0.0,0.0,0.0,2.0,2.0,1.0,0.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,21.0,15.0,2.0
6,60000.0,m,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,20.0,0.0,18.0
7,70000.0,m,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,0.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,29.0,6.0
8,0.0,f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,24.0,0.0,15.0
9,88000.0,m,0.0,2.0,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,5.0,17.0


In [805]:
#Males vs. Females Yes/No/Unsure
males = incomesums.loc[incomesums['Gender']=="m",:]
females = incomesums.loc[incomesums['Gender']=="f",:]

males.append(males.sum(numeric_only = True), ignore_index = True)
females.append(females.sum(numeric_only = True), ignore_index = True)

Unnamed: 0,What is your yearly income?,Gender,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley,No,Yes,Unsure
0,85000.0,f,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.0,8.0,7.0
1,78000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,9.0,2.0,27.0
2,0.0,f,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,2.0,0.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,7.0,18.0,14.0
3,28000.0,f,0.0,0.0,0.0,2.0,2.0,1.0,0.0,1.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,21.0,15.0,2.0
4,90000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,28.0,0.0,10.0
5,110000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,0.0,11.0
6,120000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,0.0,12.0
7,0.0,f,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,24.0,0.0,15.0
8,97000.0,f,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,0.0,17.0
9,608000.0,,4.0,4.0,4.0,6.0,6.0,8.0,6.0,9.0,5.0,5.0,4.0,7.0,6.0,6.0,6.0,7.0,7.0,6.0,7.0,7.0,8.0,3.0,3.0,2.0,6.0,5.0,8.0,1.0,3.0,4.0,5.0,3.0,4.0,5.0,7.0,4.0,6.0,4.0,186.0,43.0,115.0


In [806]:
males.append(males.sum(numeric_only = True), ignore_index = True)

Unnamed: 0,What is your yearly income?,Gender,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley,No,Yes,Unsure
0,85000.0,m,2.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,2.0,2.0,2.0,2.0,1.0,2.0,13.0,9.0,16.0
1,40000.0,m,2.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,1.0,0.0,1.0,0.0,7.0,2.0,29.0
2,167000.0,m,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,2.0,6.0,4.0,28.0
3,125000.0,m,1.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,0.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,2.0,10.0,6.0,22.0
4,60000.0,m,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,20.0,0.0,18.0
5,70000.0,m,2.0,2.0,2.0,2.0,2.0,1.0,2.0,1.0,0.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,0.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,29.0,6.0
6,88000.0,m,0.0,2.0,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,5.0,17.0
7,140000.0,m,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34.0,0.0,4.0
8,110000.0,m,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,3.0,10.0
9,885000.0,,8.0,6.0,3.0,8.0,9.0,9.0,10.0,7.0,4.0,11.0,10.0,9.0,8.0,8.0,6.0,7.0,8.0,8.0,9.0,7.0,7.0,8.0,3.0,3.0,7.0,9.0,5.0,3.0,3.0,7.0,5.0,6.0,9.0,7.0,7.0,8.0,6.0,8.0,134.0,58.0,150.0


In [807]:
highincomes.loc[highincomes['Gender']=='f',:]

Unnamed: 0,What is your yearly income?,Gender,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley,No,Yes,Unsure
7,90000,f,0,0,0,0,0,1,1,1,0,0,0,1,1,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,28,0,10
10,110000,f,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,27,0,11
12,120000,f,0,0,0,0,0,1,0,1,1,0,0,1,0,1,1,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,12
18,97000,f,0,0,0,0,0,1,0,1,0,1,0,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,21,0,17


In [808]:
# Create a simple view using percentages of the males' and females' yes/no/unsure values.
gendercomps = pd.DataFrame([["Males", (136/342)*100, (59/342)*100, (150/342)*100],
                     ["Females", (191/344)*100, (45/344)*100, (115/344)*100]],
                     columns = ["Gender","% No","% Yes","% Unsure"])
gendercomps

Unnamed: 0,Gender,% No,% Yes,% Unsure
0,Males,39.766082,17.251462,43.859649
1,Females,55.523256,13.081395,33.430233


In [809]:
# Create a simple view of the percentages of the high and low incomes yes/no/unsure values.
incomecomps = pd.DataFrame([[">= $90,000", (232/380)*100, (17/380)*100, (137/380)*100],
                     ["< $90,000", (168/458)*100, (104/458)*100, (192/458)*100]],
                     columns = ["Income Level","% No","% Yes","% Unsure"])
incomecomps

Unnamed: 0,Income Level,% No,% Yes,% Unsure
0,">= $90,000",61.052632,4.473684,36.052632
1,"< $90,000",36.681223,22.707424,41.921397


In [810]:
# Iterate through the yes/no/unsures for each neighborhood.
j = 0
nbnoes = ["No"]
nbunsures = ["Unsure"]
nbyesses = ["Yes"]
while j < 43:
    counter = collections.Counter(incomesums.iloc[:,j])
    nbnoes.append(counter[1])
    nbunsures.append(counter[0])
    nbyesses.append(counter[2])
    j += 1
    
columnnames = list(incomesums.columns)
columnnames = ["Answers"] + columnnames

byneighborhood = pd.DataFrame([nbnoes, 
                               nbyesses, 
                               nbunsures], 
                               columns = [columnnames])

In [811]:
counter = collections.Counter(incomesums.iloc[:,10])
counter

Counter({0: 12, 2: 2, 1: 8})

In [812]:
byneighborhood = byneighborhood.drop('Gender', axis = 1)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


In [813]:
byneighborhood = byneighborhood.drop('What is your yearly income?', axis = 1)

In [814]:
# View just the neighborhoods and their respective yes/no/unsure values.
byneighborhood = byneighborhood.iloc[:,0:39]
byneighborhood

Unnamed: 0,Answers,Broadview,Northgate,Lake City,Greenwood,Wedgwood,Ballard,Phinney,Greenlake,University District,Ravenna,Laurelhurst,Fremont,Wallingford,Magnolia,Queen Anne,Lower Queen Anne,Eastlake,Montlake,Madison Park,South Lake Union,Capitol Hill,Madrona,Belltown,Downtown,First Hill,Central District,Leschi,Pioneer Square,Chinatown / ID,Beacon Hill,Mt. Baker,West Seattle,Delridge,Columbia City,Seward Park,Southpark,Georgetown,Rainier Valley
0,No,2,2,1,6,4,21,9,19,8,10,5,19,13,13,16,14,12,12,13,11,16,9,8,3,10,15,10,4,4,7,5,6,1,5,7,0,8,0
1,Yes,6,4,3,5,6,0,6,0,2,5,6,1,2,2,0,1,2,3,3,2,1,3,0,2,2,1,2,1,2,3,4,3,7,5,5,7,4,7
2,Unsure,14,16,18,11,12,1,7,3,12,7,11,2,7,7,6,7,8,7,6,9,5,10,14,17,10,6,10,17,16,12,13,13,14,12,10,15,10,15
