In [1]:
import folium
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('housing_with_zips.csv')
df

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms,zip
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn",1908,0,1.0,0,11206.0
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan",1800,0,1.0,0,10003.0
2,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 3EE","East Village, Downtown Manhattan, Manhattan",1800,0,1.0,0,10003.0
3,https://www.renthop.com/listings/25-hillside-a...,"25 Hillside Avenue, Apt 2B","Fort George, Washington Heights, Upper Manhatt...",1776,1,1.0,0,10040.0
4,https://www.renthop.com/listings/525-west-52nd...,"525 West 52nd Street, Apt 2NN","Hell's Kitchen, Midtown Manhattan, Manhattan",5600,2,2.0,0,10019.0
...,...,...,...,...,...,...,...,...
9224,https://www.renthop.com/listings/east-48th-str...,East 48th Street,"Turtle Bay, Midtown East, Midtown Manhattan, M...",4958,3,2.0,0,
9225,https://www.renthop.com/listings/west-31st-str...,West 31st Street,"Chelsea, Midtown Manhattan, Manhattan",3735,1,1.0,0,
9226,https://www.renthop.com/listings/1093-dean-st/...,"1093 Dean Street, Apt 7","Crown Heights, Central Brooklyn, Brooklyn",1760,0,1.0,0,11216.0
9227,https://www.renthop.com/listings/wall-st/309/1...,Wall St,"Financial District, Downtown Manhattan, Manhattan",3450,1,1.0,1,


In [3]:
df = df[df.zip.notnull()].reset_index(drop=True)
df.zip = df.zip.astype(int)
df

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms,zip
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn",1908,0,1.0,0,11206
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan",1800,0,1.0,0,10003
2,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 3EE","East Village, Downtown Manhattan, Manhattan",1800,0,1.0,0,10003
3,https://www.renthop.com/listings/25-hillside-a...,"25 Hillside Avenue, Apt 2B","Fort George, Washington Heights, Upper Manhatt...",1776,1,1.0,0,10040
4,https://www.renthop.com/listings/525-west-52nd...,"525 West 52nd Street, Apt 2NN","Hell's Kitchen, Midtown Manhattan, Manhattan",5600,2,2.0,0,10019
...,...,...,...,...,...,...,...,...
4649,https://www.renthop.com/listings/36-west-26th-...,36 West 26th Street,"NoMad, Midtown Manhattan, Manhattan",1450,0,2.0,0,10010
4650,https://www.renthop.com/listings/1150-presiden...,"1153 President Street, Apt 4E","Crown Heights, Central Brooklyn, Brooklyn",2200,2,1.0,0,11225
4651,https://www.renthop.com/listings/269-e-52nd-st...,269 E 52nd Street,"East Flatbush, Central Brooklyn, Brooklyn",2450,3,1.0,0,11203
4652,https://www.renthop.com/listings/28-powers-str...,"28 Powers Street, Apt 2L","East Williamsburg, Williamsburg, Northern Broo...",2825,2,1.5,0,11211


## Visualizing the data

In [4]:
zdf_mean = df.groupby('zip')['rent'].mean().to_frame('avg_rent')\
.sort_values(by='avg_rent', ascending=False).reset_index()
zdf_mean

Unnamed: 0,zip,avg_rent
0,10282,6249.166667
1,10007,5875.111111
2,10044,4933.000000
3,12440,4800.000000
4,10069,4428.888889
...,...,...
111,11219,1600.000000
112,10454,1600.000000
113,11214,1597.500000
114,7735,1557.000000


In [11]:
# create a map with Empire State Building as the centre
m = folium.Map(location=[40.74817, -73.985428], zoom_start=13)

folium.Choropleth(
    geo_data=open('nyc_zip.geojson').read(),
    data=zdf_mean,
    columns=['zip', 'avg_rent'],
    key_on='feature.properties.postalcode',
    fill_color='YlOrRd', fill_opacity=0.7, line_opacity=0.2,
    
).add_to(m)

m

## Modelling the data

In [12]:
import patsy 
import statsmodels.api as sm

In [17]:
f = 'rent ~ zip + beds'
y, X = patsy.dmatrices(f, df, return_type='dataframe')

results = sm.OLS(y, X).fit() 
results.summary()

0,1,2,3
Dep. Variable:,rent,R-squared:,0.197
Model:,OLS,Adj. R-squared:,0.197
Method:,Least Squares,F-statistic:,570.1
Date:,"Thu, 24 Dec 2020",Prob (F-statistic):,3.64e-222
Time:,08:38:02,Log-Likelihood:,-40856.0
No. Observations:,4654,AIC:,81720.0
Df Residuals:,4651,BIC:,81740.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7924.9149,419.190,18.905,0.000,7103.103,8746.726
zip,-0.5973,0.041,-14.638,0.000,-0.677,-0.517
beds,721.9170,22.078,32.698,0.000,678.634,765.201

0,1,2,3
Omnibus:,8032.14,Durbin-Watson:,1.919
Prob(Omnibus):,0.0,Jarque-Bera (JB):,13300399.981
Skew:,11.713,Prob(JB):,0.0
Kurtosis:,263.844,Cond. No.,189000.0


In [6]:
df.head(2)

Unnamed: 0,url,address,neighborhood,rent,beds,baths,flexible_rooms,zip
0,https://www.renthop.com/listings/10-montieth-s...,"10 Montieth Street, Apt 341","Bushwick, Northern Brooklyn, Brooklyn",1908,0,1.0,0,11206
1,https://www.renthop.com/listings/225-east-10th...,"225 East 10th Street, Apt 5BB","East Village, Downtown Manhattan, Manhattan",1800,0,1.0,0,10003
