In [None]:
# import our libraries
import pandas as pd
%pylab inline

In [None]:
# tell python where the file is
# you can download this dataset here: https://data.cityofnewyork.us/Housing-Development/DOB-Complaints-Received/eabe-havv
path = 'DOB_Complaints_Received.csv'
# read the csv file and convert to a pandas DataFrame, store the DataFrame in 
# variable complaints
complaints = pd.read_csv(path)

In [None]:
complaints

In [None]:
# we can use indexing with DataFrames
complaints[0:10]

In [None]:
# print out one column from our DataFrame - this is called a Series
# pandas allows us to use dot notation
complaints.Status

In [None]:
# dot notation does not work for column names with spaces
# if spaces in column name, need to use [] square brackets and quotes
complaints['House Number']

In [None]:
# print out all the unique values from this Series (ie column)
complaints.Status.unique()

In [None]:
# making a sub dataset of all active complaints
# my sub dataset is still a pandas DataFrame because it will 
# still have rows and columns

# complaints is a DataFrame (two dimensions) so we need two sets of brackets
# to do list comprehension
active_complaints = complaints[[x for x in complaints.Status == 'ACTIVE']]

In [None]:
active_complaints

In [None]:
# active_complaints is my DataFrame of active complaints
# find more Complaint Categories here: https://www1.nyc.gov/assets/buildings/pdf/complaint_category.pdf
active_elevator_accidents = active_complaints[[ x for x in active_complaints['Complaint Category'] == '81']]

In [None]:
# preview our elevator dataset
active_elevator_accidents

In [None]:
# making a list of streets i don't want to visit
# tolist() makes a Series (ie a column) a python list
addresses = active_elevator_accidents['House Street'].tolist()

In [None]:
addresses

In [None]:
# all elevator accidents regardless of status (ie ACTIVE or CLOSED)
# I looked up DOB Complaint Categories and discovered category 81 was 
# elevator accidents
elevator_accidents = complaints[[ x for x in complaints['Complaint Category'] == '81']]

In [None]:
elevator_accidents

In [None]:
# re-doing addresses with our newest DataFrame
all_addresses = elevator_accidents['House Street'].tolist()

In [None]:
all_addresses

In [None]:
# strip() gets rid of excess of whitespace
new_addresses = [address.strip() for address in all_addresses]

In [None]:
new_addresses

In [None]:
# do the same for house numbers
house_numbers = elevator_accidents['House Number'].tolist()
new_house_numbers = [num.strip() for num in house_numbers]

In [None]:
new_house_numbers

In [None]:
# a list of places not to visit

# zip combines our lists
# zip(new_house_numbers, new_addresses)

bad_places = [x + " " + y for x,y in zip(new_house_numbers, new_addresses)]
print(bad_places)

In [None]:
from collections import Counter
# get the five places that have the most elevator accidents 
worst_places = Counter(bad_places).most_common(5)
print(worst_places)