## Import the database from MongoDB

In [2]:
from pymongo import MongoClient
import pandas as pd
from json import loads, dumps

In [3]:
mongo = MongoClient(port=27017)

In [4]:
print(mongo.list_database_names())

['admin', 'classDB', 'config', 'epa', 'fruits_db', 'listings_db', 'local', 'travel_db', 'uk_food']


In [5]:
listings = mongo['listings_db'].nyc_listings
print(listings)

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'listings_db'), 'nyc_listings')


In [6]:
listings_arr = listings.find()
listings_df = pd.DataFrame(listings_arr)
listings_df.head()

Unnamed: 0,_id,id,listing_url,name,neighborhood_overview,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,...,bathrooms_text,bedrooms,beds,amenities,price,number_of_reviews,first_review,last_review,review_scores_rating,reviews_per_month
0,666c99367177588cd31b62d7,2595,https://www.airbnb.com/rooms/2595,Rental unit in New York · ★4.68 · Studio · 1 b...,Centrally located in the heart of Manhattan ju...,Midtown,Manhattan,40.75356,-73.98559,Entire rental unit,...,1 bath,,1.0,"[Long term stays allowed, Cooking basics, Ethe...",$240.00,49,2009-11-21,2022-06-21,4.68,0.29
1,666c99367177588cd31b62d8,5121,https://www.airbnb.com/rooms/5121,Rental unit in Brooklyn · ★4.52 · 1 bedroom · ...,,Bedford-Stuyvesant,Brooklyn,40.68535,-73.95512,Private room in rental unit,...,,,1.0,"[Air conditioning, Wifi, Kitchen, Heating]",$66.00,50,2009-05-28,2019-12-02,4.52,0.28
2,666c99367177588cd31b62d9,6848,https://www.airbnb.com/rooms/6848,Rental unit in Brooklyn · ★4.58 · 2 bedrooms ·...,,Williamsburg,Brooklyn,40.70935,-73.95342,Entire rental unit,...,1 bath,,1.0,"[Cooking basics, Microwave, Extra pillows and ...",$81.00,191,2009-05-25,2023-08-14,4.58,1.08
3,666c99367177588cd31b62da,6990,https://www.airbnb.com/rooms/6990,Rental unit in New York · ★4.88 · 1 bedroom · ...,"Location: Five minutes to Central Park, Museum...",East Harlem,Manhattan,40.78778,-73.94759,Private room in rental unit,...,1 shared bath,,1.0,"[Hair dryer, Air conditioning, Fire extinguish...",$70.00,246,2009-10-28,2023-08-14,4.88,1.43
4,666c99367177588cd31b62db,6872,https://www.airbnb.com/rooms/6872,Condo in New York · 1 bedroom · 1 bed · 1 shar...,This sweet Harlem sanctuary is a 10-20 minute ...,East Harlem,Manhattan,40.80107,-73.94255,Private room in condo,...,1 shared bath,,1.0,"[Fire extinguisher, Long term stays allowed, H...",$65.00,1,2022-06-05,2022-06-05,5.0,0.05


## Clean the data

In [7]:
listings_df.dtypes

_id                                     object
id                                       int64
listing_url                             object
name                                    object
neighborhood_overview                   object
neighbourhood_cleansed                  object
neighbourhood_group_cleansed            object
latitude                               float64
longitude                              float64
property_type                           object
room_type                               object
accommodates                             int64
bathrooms                               object
bathrooms_text                          object
bedrooms                               float64
beds                                   float64
amenities                               object
price                                   object
number_of_reviews                        int64
first_review                    datetime64[ns]
last_review                     datetime64[ns]
review_scores

In [8]:
reduced_listings = listings_df[['name', 'neighbourhood_cleansed', 'neighbourhood_group_cleansed'\
                               ,'latitude','longitude','room_type', 'amenities', 'price'\
                                , 'number_of_reviews', 'review_scores_rating']]
reduced_listings.dtypes

name                             object
neighbourhood_cleansed           object
neighbourhood_group_cleansed     object
latitude                        float64
longitude                       float64
room_type                        object
amenities                        object
price                            object
number_of_reviews                 int64
review_scores_rating            float64
dtype: object

In [9]:
reduced_listings['price'] = reduced_listings['price'].str.replace(",", "")
reduced_listings['price'] = reduced_listings['price'].str.replace("$", "")

reduced_listings['price']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reduced_listings['price'] = reduced_listings['price'].str.replace(",", "")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reduced_listings['price'] = reduced_listings['price'].str.replace("$", "")


0        240.00
1         66.00
2         81.00
3         70.00
4         65.00
          ...  
28127    225.00
28128    101.00
28129    105.00
28130    118.00
28131    285.00
Name: price, Length: 28132, dtype: object

In [10]:
reduced_listings = reduced_listings.astype({
    "price": float
})

reduced_listings.dtypes

name                             object
neighbourhood_cleansed           object
neighbourhood_group_cleansed     object
latitude                        float64
longitude                       float64
room_type                        object
amenities                        object
price                           float64
number_of_reviews                 int64
review_scores_rating            float64
dtype: object

In [11]:
reduced_listings.to_json("data/cleaned_listings.json", orient="records")


## Create DataFrames for each borough

### Manhattan Aggregates

In [20]:
manhattan_listings = reduced_listings[listings_df["neighbourhood_group_cleansed"] == "Manhattan"]

manhattan_listings.head()

Unnamed: 0,name,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,room_type,amenities,price,number_of_reviews,review_scores_rating
0,Rental unit in New York · ★4.68 · Studio · 1 b...,Midtown,Manhattan,40.75356,-73.98559,Entire home/apt,"[Long term stays allowed, Cooking basics, Ethe...",240.0,49,4.68
3,Rental unit in New York · ★4.88 · 1 bedroom · ...,East Harlem,Manhattan,40.78778,-73.94759,Private room,"[Hair dryer, Air conditioning, Fire extinguish...",70.0,246,4.88
4,Condo in New York · 1 bedroom · 1 bed · 1 shar...,East Harlem,Manhattan,40.80107,-73.94255,Private room,"[Fire extinguisher, Long term stays allowed, H...",65.0,1,5.0
10,Rental unit in New York · ★4.52 · Studio · 1 b...,Hell's Kitchen,Manhattan,40.76724,-73.98664,Entire home/apt,"[Long term stays allowed, Paid washer – In bui...",175.0,58,4.52
11,Rental unit in New York · ★4.39 · 1 bedroom · ...,East Village,Manhattan,40.72296,-73.98383,Private room,"[Long term stays allowed, Cooking basics, Micr...",70.0,315,4.39


In [21]:
# Group by neighborhood, find average price
avg_price_nbhd_manhattan = manhattan_listings.groupby(["neighbourhood_cleansed"])["price"].mean()

avg_price_nbhd_manhattan = avg_price_nbhd_manhattan.round(2)

avg_price_nbhd_manhattan.to_json("data/average_price_nbhd_manhattan.json", orient="records")

avg_price_nbhd_manhattan

neighbourhood_cleansed
Battery Park City      268.88
Chelsea                290.04
Chinatown              210.76
Civic Center           288.50
East Harlem            158.56
East Village           228.85
Financial District     315.13
Flatiron District      484.31
Gramercy               213.86
Greenwich Village      262.47
Harlem                 152.09
Hell's Kitchen         277.36
Inwood                  92.26
Kips Bay               221.61
Little Italy           249.00
Lower East Side        230.06
Marble Hill             95.86
Midtown                367.61
Morningside Heights    123.78
Murray Hill            303.91
NoHo                   391.04
Nolita                 235.87
Roosevelt Island       145.96
SoHo                   373.38
Stuyvesant Town        162.18
Theater District       490.06
Tribeca                464.11
Two Bridges            131.03
Upper East Side        227.35
Upper West Side        240.96
Washington Heights     117.74
West Village           316.67
Name: price, dtyp

### Bronx Aggregates

In [23]:
bronx_listings = reduced_listings[listings_df["neighbourhood_group_cleansed"] == "Bronx"]
bronx_listings.head()

Unnamed: 0,name,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,room_type,amenities,price,number_of_reviews,review_scores_rating
98,Rental unit in Bronx · ★4.93 · 1 bedroom · 1 b...,Eastchester,Bronx,40.881,-73.83511,Entire home/apt,"[Long term stays allowed, Hair conditioner co...",114.0,57,4.93
114,Rental unit in Bronx · ★4.56 · 1 bedroom · 1 b...,Kingsbridge,Bronx,40.87069,-73.90113,Entire home/apt,"[Long term stays allowed, Cooking basics, Pets...",90.0,16,4.56
244,Home in Bronx · ★4.47 · 1 bedroom · 1 bed · 1 ...,University Heights,Bronx,40.85981,-73.9063,Private room,"[Hair dryer, Air conditioning, Iron, Heating, ...",45.0,164,4.47
271,Guest suite in Bronx · ★4.68 · 1 bedroom · 2 b...,Allerton,Bronx,40.86502,-73.85496,Entire home/apt,"[Outdoor dining area, Long term stays allowed,...",175.0,346,4.68
332,Guest suite in Riverdale · ★4.74 · 2 bedrooms...,Spuyten Duyvil,Bronx,40.88095,-73.91701,Entire home/apt,"[Outdoor dining area, Long term stays allowed,...",151.0,146,4.74


In [24]:
# Group by neighborhood, find average price
avg_price_nbhd_bronx = bronx_listings.groupby(["neighbourhood_cleansed"])["price"].mean()

avg_price_nbhd_bronx = avg_price_nbhd_bronx.round(2)

avg_price_nbhd_bronx.to_json("data/average_price_nbhd_bronx.json", orient="records")

avg_price_nbhd_bronx

neighbourhood_cleansed
Allerton              101.76
Baychester             99.20
Belmont               104.35
Bronxdale              72.88
Castle Hill           123.20
City Island           182.31
Claremont Village     111.94
Clason Point          105.72
Co-op City             78.60
Concourse              98.29
Concourse Village     106.60
Country Club          161.50
East Morrisania        98.38
Eastchester           130.12
Edenwald               85.06
Fieldston              92.75
Fordham                90.41
Highbridge             87.06
Hunts Point            56.50
Kingsbridge            90.30
Longwood              367.31
Melrose               109.21
Morris Heights         82.13
Morris Park            85.29
Morrisania             89.22
Mott Haven            160.78
Mount Eden            104.00
Mount Hope             82.93
North Riverdale       163.33
Norwood                80.25
Olinville             213.00
Parkchester            81.88
Pelham Bay            131.00
Pelham Gardens      

### Queens Aggregates

In [13]:
queens_listings = reduced_listings[listings_df["neighbourhood_group_cleansed"] == "Queens"]
queens_listings.head()

Unnamed: 0,name,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,room_type,amenities,price,number_of_reviews,review_scores_rating
12,Townhouse in Queens · ★4.91 · 1 bedroom · 1 be...,Long Island City,Queens,40.74757,-73.94571,Private room,"[Private backyard – Fully fenced, Coffee, Gard...",165.0,385,4.91
25,Condo in Queens · ★5.0 · 1 bedroom · 1 bed · 1...,Woodside,Queens,40.74884,-73.90536,Private room,"[Air conditioning, Heating, Washer, Dryer, Ele...",65.0,30,5.0
55,Rental unit in Long Island City · ★4.93 · 1 be...,Sunnyside,Queens,40.74523,-73.92318,Entire home/apt,"[Long term stays allowed, Cooking basics, GE s...",78.0,32,4.93
65,Townhouse in Queens · ★4.42 · 5 bedrooms · 10 ...,Ridgewood,Queens,40.70309,-73.89963,Entire home/apt,"[Hair dryer, Air conditioning, Long term stays...",500.0,13,4.42
70,Guesthouse in Queens · ★4.34 · 1 bedroom · 1 b...,Middle Village,Queens,40.71567,-73.87842,Entire home/apt,"[Air conditioning, Heating, Free parking on pr...",130.0,33,4.34


In [25]:
# Group by neighborhood, find average price
avg_price_nbhd_queens = queens_listings.groupby(["neighbourhood_cleansed"])["price"].mean()

avg_price_nbhd_queens.to_json("data/average_price_nbhd_queens.json", orient="records")

avg_price_nbhd_queens

neighbourhood_cleansed
Arverne                195.362500
Astoria                112.432485
Bay Terrace            152.200000
Bayside                142.545455
Bayswater               94.230769
Belle Harbor           223.714286
Bellerose              128.272727
Breezy Point           150.000000
Briarwood              121.842105
Cambria Heights        126.533333
College Point          119.181818
Corona                  85.580000
Ditmars Steinway       120.776398
Douglaston              82.800000
East Elmhurst          138.113744
Edgemere               186.733333
Elmhurst                73.995327
Far Rockaway           150.625000
Flushing               112.358090
Forest Hills           140.263158
Fresh Meadows          106.937500
Glendale               118.480000
Hollis                  97.789474
Holliswood             185.500000
Howard Beach           167.166667
Jackson Heights         99.866279
Jamaica                127.363636
Jamaica Estates        105.607143
Jamaica Hills          13

### Brooklyn Aggregates

In [26]:
brooklyn_listings = reduced_listings[listings_df["neighbourhood_group_cleansed"] == "Brooklyn"]
brooklyn_listings.head()

Unnamed: 0,name,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,room_type,amenities,price,number_of_reviews,review_scores_rating
1,Rental unit in Brooklyn · ★4.52 · 1 bedroom · ...,Bedford-Stuyvesant,Brooklyn,40.68535,-73.95512,Private room,"[Air conditioning, Wifi, Kitchen, Heating]",66.0,50,4.52
2,Rental unit in Brooklyn · ★4.58 · 2 bedrooms ·...,Williamsburg,Brooklyn,40.70935,-73.95342,Entire home/apt,"[Cooking basics, Microwave, Extra pillows and ...",81.0,191,4.58
5,Home in Brooklyn · ★4.88 · 1 bedroom · 2 beds ...,Fort Greene,Brooklyn,40.69194,-73.97389,Private room,"[Outdoor dining area, Long term stays allowed,...",205.0,355,4.88
6,Loft in Brooklyn · ★4.91 · 1 bedroom · 1 bed ·...,Williamsburg,Brooklyn,40.71248,-73.95881,Private room,"[Air conditioning, Iron, Heating, Essentials, ...",95.0,13,4.91
7,Place to stay in Brooklyn · ★4.90 · 1 bed · 1 ...,Williamsburg,Brooklyn,40.718807,-73.956177,Entire home/apt,"[Long term stays allowed, Stainless steel sing...",350.0,12,4.9


In [27]:
# Group by neighborhood, find average price
avg_price_nbhd_brooklyn = brooklyn_listings.groupby(["neighbourhood_cleansed"])["price"].mean()

avg_price_nbhd_brooklyn = avg_price_nbhd_brooklyn.round(2)

avg_price_nbhd_brooklyn.to_json("data/average_price_nbhd_brooklyn.json", orient="records")

avg_price_nbhd_brooklyn

neighbourhood_cleansed
Bath Beach                   167.14
Bay Ridge                    107.56
Bedford-Stuyvesant           136.80
Bensonhurst                  103.93
Bergen Beach                 240.22
Boerum Hill                  230.23
Borough Park                  86.62
Brighton Beach               152.70
Brooklyn Heights             207.34
Brownsville                  137.92
Bushwick                     111.88
Canarsie                     161.96
Carroll Gardens              230.37
Clinton Hill                 181.76
Cobble Hill                  209.98
Columbia St                  191.39
Coney Island                 117.53
Crown Heights                153.11
Cypress Hills                101.70
DUMBO                        303.05
Downtown Brooklyn            274.15
Dyker Heights                100.56
East Flatbush                129.65
East New York                133.40
Flatbush                     142.78
Flatlands                    109.21
Fort Greene                  213.06
Fort 

### Staten Island Aggregates

In [28]:
staten_island_listings = reduced_listings[listings_df["neighbourhood_group_cleansed"] == "Staten Island"]
staten_island_listings.head()

Unnamed: 0,name,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,room_type,amenities,price,number_of_reviews,review_scores_rating
62,Rental unit in Staten Island · ★4.81 · 1 bedro...,St. George,Staten Island,40.64633,-74.08156,Private room,"[Coffee, Cooking basics, Microwave, Elevator, ...",70.0,188,4.81
164,Guest suite in Staten Island · 1 bedroom · 2 b...,Emerson Hill,Staten Island,40.60742,-74.14388,Entire home/apt,"[Air conditioning, Heating, Carbon monoxide al...",76.0,1,5.0
185,Rental unit in Staten Island · ★4.58 · 1 bedro...,Shore Acres,Staten Island,40.61019,-74.06757,Entire home/apt,"[Long term stays allowed, Cooking basics, Micr...",84.0,94,4.58
505,Rental unit in Staten Island · ★4.75 · 2 bedro...,New Springville,Staten Island,40.59274,-74.16178,Private room,"[Air conditioning, Carbon monoxide alarm, Micr...",68.0,8,4.75
556,Home in Staten Island · ★4.92 · 4 bedrooms · 6...,Tottenville,Staten Island,40.50863,-74.24135,Entire home/apt,"[Backyard, Pocket wifi, Cooking basics, Pets a...",559.0,105,4.92


In [29]:
# Group by neighborhood, find average price
avg_price_nbhd_staten_island = staten_island_listings.groupby(["neighbourhood_cleansed"])["price"].mean()

avg_price_nbhd_staten_island = avg_price_nbhd_staten_island.round(2)

avg_price_nbhd_staten_island.to_json("data/average_price_nbhd_staten_island.json", orient="records")

avg_price_nbhd_staten_island

neighbourhood_cleansed
Arden Heights                 140.50
Arrochar                      128.09
Bay Terrace, Staten Island    171.00
Bull's Head                   118.86
Castleton Corners             160.80
Chelsea, Staten Island         70.00
Clifton                       139.50
Concord                       108.71
Dongan Hills                  142.50
Eltingville                   131.67
Emerson Hill                  126.00
Graniteville                   77.00
Grant City                     63.62
Great Kills                   101.50
Grymes Hill                   200.00
Howland Hook                  173.00
Huguenot                       90.00
Lighthouse Hill               250.00
Mariners Harbor                94.45
Midland Beach                 152.43
New Brighton                   95.00
New Dorp Beach                108.86
New Springville               176.00
Oakwood                       124.00
Port Richmond                  77.50
Prince's Bay                  115.33
Randall Manor  