In [588]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import geopandas
import numpy as np
import time
import seaborn as sns

pd.set_option('display.max_columns',75)

In [589]:
df = pd.read_csv('../datasets/total_data.csv',index_col=0)

In [590]:
df = df.drop(columns=['City','State','Metro'])
df = df.dropna(subset=['sale_date'])
df = df.fillna(0)

In [652]:
df[df.sale_price==0]

Unnamed: 0,RegionID,neighborhood,CountyName,SizeRank,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,...,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,Unnam,addr,lng,lat,apartment_number,borough,building_class_category,commercial_units,gross_square_feet,land_square_feet,residential_units,sale_date,sale_price,total_units,year_built,zip_code,rent_estimate


In [592]:
def rename_cols(df):
    new_cols = []
    for c in df.columns[0:4]:
        new_cols.append(c)
    for c in df.columns[4:-14]:
        d = c.split(' ')[0][:-3]
        new_cols.append(d)
    for c in df.columns[-14:]:
        new_cols.append(c)
    df.columns = new_cols

In [593]:
rename_cols(df)

In [594]:
def rename_sale_date(df):
    sale_dates = list(df.sale_date)
    new_dates = []
    for date in sale_dates:
        new_dates.append(str(date)[:7])
    df.sale_date = new_dates

In [595]:
rename_sale_date(df)

In [596]:
df.columns

Index(['RegionID', 'neighborhood', 'CountyName', 'SizeRank', '2012-01',
       '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07',
       '2012-08', '2012-09', '2012-10', '2012-11', '2012-12', '2013-01',
       '2013-02', '2013-03', '2013-04', '2013-05', '2013-06', '2013-07',
       '2013-08', '2013-09', '2013-10', '2013-11', '2013-12', '2014-01',
       '2014-02', '2014-03', '2014-04', '2014-05', '2014-06', '2014-07',
       '2014-08', '2014-09', '2014-10', '2014-11', '2014-12', '2015-01',
       '2015-02', '2015-03', '2015-04', '2015-05', '2015-06', '2015-07',
       '2015-08', '2015-09', '2015-10', '2015-11', '2015-12', '2016-01',
       '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07',
       '2016-08', '2016-09', '2016-10', '2016-11', '2016-12', '2017-01',
       '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07',
       '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01',
       '2018-02', '2018-03', '2018-04', '2018-05', '

In [597]:
def get_rent_estimates(df):
    rent_estimates = []
    start=time.time()
    for index, row in df.iterrows():
        for col in df.columns:
            if row.sale_date==row[col]:  
                if row[row.sale_date] !=0:
                    rent_estimate = row.sale_price/row[row.sale_date]/12
                    rent_estimates.append(round(rent_estimate))
                else:
                    rent_estimates.append(0)
        if (index+1)%5000 == 0:
            print('Runtime: {} seconds. Row: {}'.format(round(time.time()-start,2),index+1))
    print('\nTotal runtime: {} seconds'.format(round(time.time()-start,2)))
    return rent_estimates

In [598]:
re = get_rent_estimates(df)

Runtime: 22.94 seconds. Row: 5000
Runtime: 42.83 seconds. Row: 10000
Runtime: 60.57 seconds. Row: 15000
Runtime: 77.86 seconds. Row: 20000
Runtime: 99.31 seconds. Row: 25000
Runtime: 119.95 seconds. Row: 30000
Runtime: 138.97 seconds. Row: 35000
Runtime: 157.1 seconds. Row: 40000
Runtime: 175.04 seconds. Row: 45000
Runtime: 196.59 seconds. Row: 50000
Runtime: 220.08 seconds. Row: 55000
Runtime: 238.69 seconds. Row: 60000
Runtime: 256.49 seconds. Row: 65000
Runtime: 275.24 seconds. Row: 70000

Total runtime: 286.94 seconds


In [599]:
df['rent_estimate'] = re

In [600]:
df.head()

Unnamed: 0,RegionID,neighborhood,CountyName,SizeRank,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,...,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,Unnam,addr,lng,lat,apartment_number,borough,building_class_category,commercial_units,gross_square_feet,land_square_feet,residential_units,sale_date,sale_price,total_units,year_built,zip_code,rent_estimate
0,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11909.0,360 MANHATTAN AVENUE,-73.957293,40.804399,A,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-05,695000.0,1.0,0.0,10026.0,2664
1,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11910.0,360 MANHATTAN AVENUE,-73.957293,40.804399,C,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-03,665000.0,1.0,0.0,10026.0,2575
2,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11911.0,360 MANHATTAN AVENUE,-73.957293,40.804399,H,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-02,721000.0,1.0,0.0,10026.0,2796
3,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11912.0,360 MANHATTAN AVENUE,-73.957293,40.804399,B,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2013-08,955000.0,1.0,0.0,10026.0,3319
4,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,24794.0,130 MANHATTAN AVENUE,-74.054322,40.744161,0,1.0,01 ONE FAMILY HOMES,0.0,3264.0,1190.0,1.0,2012-12,2100000.0,1.0,1900.0,10025.0,8155


In [601]:
df.rent_estimate.describe()

count    7.315700e+04
mean     8.534942e+03
std      6.026241e+04
min      0.000000e+00
25%      2.120000e+03
50%      3.387000e+03
75%      6.486000e+03
max      6.772565e+06
Name: rent_estimate, dtype: float64

In [604]:
df[df.commercial_units>0].describe()

Unnamed: 0,RegionID,SizeRank,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,...,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,Unnam,lng,lat,borough,commercial_units,gross_square_feet,land_square_feet,residential_units,sale_price,total_units,year_built,zip_code,rent_estimate
count,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,...,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0,2950.0
mean,256018.504068,270.08339,20.682319,20.665424,20.614007,20.537634,20.374807,20.257119,20.183454,20.107556,20.273729,20.395756,20.435268,20.689688,21.095481,21.304492,21.31678,21.180441,20.981447,20.934054,20.923827,21.15082,21.137108,21.499776,21.639831,21.580837,21.668393,21.620631,21.763132,22.390214,22.814064,22.828986,22.744498,22.642146,22.543539,22.3768,22.570617,...,23.169471,23.497369,23.501905,23.507498,23.668803,23.734803,23.763549,24.069214,24.181322,24.601224,24.886217,24.832949,24.809146,24.758814,26.106058,26.070173,26.310142,26.630729,26.602769,26.584895,26.736081,26.866169,27.132678,27.289851,45449.07661,-77.377833,39.810053,2.39661,1.0,20351.66,10327.805763,0.054915,12857650.0,1.0,1876.381356,10847.413898,45113.3
std,60437.884095,429.867891,4.765406,4.684733,4.787566,4.847025,4.702912,4.727614,4.696017,4.690873,4.870685,4.942952,4.898101,5.136783,5.535461,5.506419,5.633193,5.531121,5.28578,5.218026,5.523196,5.677448,5.539552,5.73109,5.471512,5.255827,4.91635,4.952885,4.940327,5.101213,5.285724,5.211849,5.258164,5.169158,4.913726,4.6539,4.65868,...,4.541557,4.538195,4.493169,4.41508,4.497474,4.539406,4.48871,4.466614,4.402769,4.219111,4.043527,3.92741,3.902937,3.931496,4.195163,4.215824,4.224622,4.396882,4.456931,4.58317,4.590462,4.633576,4.893209,4.966712,21960.309011,15.12811,4.917888,0.918143,0.0,90159.38,33808.406517,1.296251,57829930.0,0.0,354.694112,649.61472,191535.8
min,193182.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.43,14.32,13.97,13.96,13.52,13.38,13.22,13.1,12.98,13.39,13.34,...,12.87,13.37,13.57,13.66,13.72,13.62,13.62,13.74,13.99,14.33,14.6,14.62,14.62,14.66,15.42,15.6,15.99,16.13,15.99,15.6,15.76,15.74,16.04,16.4,100.0,-124.218764,-42.885005,1.0,1.0,0.0,0.0,0.0,101400.0,1.0,0.0,0.0,0.0
25%,194737.0,24.0,17.13,17.72,17.01,16.98,16.89,17.13,17.1,17.11,17.13,17.3,17.26,17.47,17.87,17.92,18.01,18.11,18.15,17.96,17.76,17.77,17.93,18.03,17.73,17.62,17.66,17.86,17.96,18.7,18.97,18.7,18.76,18.83,18.85,18.98,19.38,...,20.45,20.89,20.98,21.07,21.21,21.37,21.45,21.56,21.6,22.71,23.36,23.58,23.72,23.84,25.12,24.95,25.43,25.97,25.96,25.54,25.6,25.47,25.1,25.06,30170.25,-74.007117,40.633184,1.0,1.0,1692.0,2000.0,0.0,900000.0,1.0,1920.0,10035.0,3523.25
50%,270841.0,76.0,19.44,19.48,19.38,19.26,19.35,19.13,19.12,18.95,18.88,18.99,19.1,19.07,19.22,19.4,19.3,19.54,19.45,19.48,19.29,19.715,19.98,20.51,21.03,21.57,22.01,22.07,22.63,22.93,23.53,23.57,23.41,23.47,23.26,23.49,23.78,...,24.55,24.89,24.87,24.625,24.7,24.65,24.77,24.69,24.81,25.0,26.04,25.98,25.88,25.7,27.26,26.85,27.07,26.92,26.53,26.63,26.82,27.07,26.74,26.73,53606.5,-73.979567,40.695555,3.0,1.0,4200.0,4000.0,0.0,2400000.0,1.0,1931.0,11211.0,8900.5
75%,270957.0,360.0,24.16,23.98,24.34,24.03,23.66,23.39,23.43,23.14,23.06,23.26,23.18,23.4,23.45,24.03,23.84,23.82,23.66,23.49,23.96,24.01,23.57,24.16,24.58,24.49,24.03,24.52,24.77,25.23,25.99,25.79,25.93,25.42,25.12,24.93,25.26,...,26.22,26.46,26.57,26.27,26.12,26.19,26.03,26.01,26.12,26.6425,27.08,27.61,26.89,26.79,28.42,27.99,28.0,28.88,28.99,28.72,28.94,29.07,29.51,29.69,61799.75,-73.942691,40.753302,3.0,1.0,10486.5,8998.25,0.0,7050000.0,1.0,1960.0,11223.0,26700.5
max,403222.0,3309.0,60.14,59.4,59.91,60.77,57.98,56.35,54.7,55.59,58.3,61.59,61.44,61.45,61.48,61.88,60.51,60.94,55.97,53.78,52.44,50.99,52.71,51.74,51.61,51.03,49.93,50.37,49.93,48.51,51.32,48.2,46.44,44.8,43.49,42.89,45.07,...,36.18,37.24,39.56,39.47,40.12,39.43,38.62,37.84,36.39,35.42,35.1,35.24,36.44,37.99,39.93,39.91,40.99,42.17,43.28,43.48,43.95,44.62,44.76,45.31,73156.0,153.462385,51.057561,3.0,1.0,1809073.0,644732.0,59.0,1791829000.0,1.0,2016.0,11249.0,5457569.0


In [605]:
df[df.rent_estimate<10000].describe()

Unnamed: 0,RegionID,SizeRank,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,...,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,Unnam,lng,lat,borough,commercial_units,gross_square_feet,land_square_feet,residential_units,sale_price,total_units,year_built,zip_code,rent_estimate
count,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,...,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0,62627.0
mean,261590.194261,348.078688,21.535633,21.45091,21.425797,21.371612,21.234756,21.199476,21.082775,20.946621,21.176422,21.297087,21.307022,21.560628,22.053928,22.238558,22.3114,22.213428,22.011439,21.937452,22.077656,22.460065,22.376388,22.722604,22.776102,22.654329,22.761863,22.733681,22.826361,23.571779,24.022199,23.95181,23.823269,23.705213,23.457153,23.162539,23.343417,...,23.610078,23.962284,23.944166,24.007594,24.153362,24.19582,24.166956,24.423372,24.508068,25.104649,25.486633,25.513483,25.469176,25.33171,26.735101,26.619016,26.830844,27.028752,26.924526,26.777351,26.87709,26.969673,27.088325,27.190343,35539.631772,-76.048872,39.748102,2.004742,0.025117,426.536334,605.320565,0.944497,1006075.0,1.0,1573.87903,10614.77663,3575.391716
std,53590.35998,478.969978,4.978171,4.936192,4.992252,5.029133,4.817967,4.87732,4.793257,4.853711,5.054413,5.190817,5.103843,5.418745,5.93075,5.734864,5.967418,5.829084,5.513807,5.299245,5.644687,5.903962,5.79335,5.979341,5.515143,5.223056,4.818284,4.849251,4.733407,4.8706,4.948873,4.75505,4.718967,4.610326,4.328317,4.086451,4.097021,...,3.757214,3.767082,3.748397,3.663207,3.705701,3.725952,3.688617,3.656577,3.631049,3.433537,3.229954,3.129621,3.097357,3.147206,3.401712,3.452103,3.481984,3.667168,3.737498,3.879317,3.904079,3.934226,4.252323,4.378485,21285.960234,19.038739,6.101214,0.999997,0.156482,2465.554006,5523.846161,0.270875,662065.9,0.0,786.15868,727.016939,2175.409922
min,20239.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.43,14.32,13.97,13.96,13.52,13.38,13.22,13.1,12.98,13.39,13.34,...,12.87,13.37,13.57,13.66,13.72,13.62,13.62,13.74,13.99,14.33,14.6,14.62,14.62,14.66,15.42,15.6,15.99,16.13,15.99,15.6,15.76,15.74,16.04,16.4,0.0,-149.847176,-42.885005,1.0,0.0,0.0,0.0,0.0,100198.0,1.0,0.0,0.0,0.0
25%,199001.0,24.0,18.34,18.21,17.63,17.72,17.54,17.31,17.43,17.59,17.59,17.99,18.07,18.24,18.56,18.54,18.8,18.56,18.45,18.49,18.26,18.49,18.6,19.38,19.06,19.3,19.74,19.8,19.36,20.3,20.84,20.9,20.69,20.8,20.36,20.39,20.26,...,20.98,21.49,21.62,21.58,21.76,22.16,22.25,22.67,22.26,22.86,23.38,23.6,24.09,23.84,25.12,25.16,25.54,26.01,25.96,25.66,25.6,25.47,25.1,25.06,16876.5,-74.004926,40.636721,1.0,0.0,0.0,0.0,1.0,525000.0,1.0,1905.0,10022.0,1971.0
50%,270885.0,132.0,21.85,21.49,21.52,21.61,21.74,21.82,21.39,21.04,20.78,21.1,21.13,21.21,21.56,21.74,22.03,22.0,22.0,22.07,22.35,21.99,21.92,21.93,23.06,22.76,22.53,22.68,23.09,23.57,23.74,23.79,23.95,24.1,24.43,23.77,24.07,...,24.46,24.52,24.4,24.52,24.7,24.65,24.77,24.69,24.81,25.0,26.12,26.16,26.01,25.82,27.43,27.16,27.24,27.03,26.53,26.27,26.31,26.28,26.55,26.39,35436.0,-73.979559,40.717525,3.0,0.0,0.0,0.0,1.0,820000.0,1.0,1940.0,11201.0,2940.0
75%,270958.0,609.0,24.16,23.98,24.34,24.16,23.94,24.25,24.03,23.91,24.26,24.31,24.3,23.91,24.17,24.34,25.38,24.84,24.44,24.65,24.87,24.7,24.54,24.78,25.4,25.54,25.46,25.37,25.69,25.77,26.13,26.39,25.99,25.84,25.78,25.36,25.56,...,26.06,26.46,26.37,26.18,25.86,25.71,25.34,25.56,25.89,26.43,27.02,27.42,26.88,26.57,28.42,27.99,28.0,28.04,28.13,28.48,28.86,29.02,29.24,29.28,53932.5,-73.953754,40.766801,3.0,0.0,0.0,0.0,1.0,1330000.0,1.0,2005.0,11220.0,4769.0
max,403222.0,3309.0,60.14,59.4,59.91,60.77,57.98,56.35,54.7,55.59,58.3,61.59,61.44,61.45,61.48,61.88,60.51,60.94,55.97,53.78,52.44,50.99,52.71,51.74,51.61,51.03,49.93,50.37,49.93,48.51,51.32,48.2,46.44,44.8,43.49,42.89,45.07,...,36.18,37.24,39.56,39.47,40.12,39.43,38.62,37.84,36.39,35.42,35.1,35.24,36.44,37.99,39.93,39.91,40.99,42.17,43.28,43.48,43.95,44.62,44.76,45.31,73156.0,153.087664,61.615485,3.0,1.0,260000.0,644732.0,24.0,12000000.0,1.0,2016.0,11416.0,9997.0


In [606]:
df.head()

Unnamed: 0,RegionID,neighborhood,CountyName,SizeRank,2012-01,2012-02,2012-03,2012-04,2012-05,2012-06,2012-07,2012-08,2012-09,2012-10,2012-11,2012-12,2013-01,2013-02,2013-03,2013-04,2013-05,2013-06,2013-07,2013-08,2013-09,2013-10,2013-11,2013-12,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,...,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,Unnam,addr,lng,lat,apartment_number,borough,building_class_category,commercial_units,gross_square_feet,land_square_feet,residential_units,sale_date,sale_price,total_units,year_built,zip_code,rent_estimate
0,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11909.0,360 MANHATTAN AVENUE,-73.957293,40.804399,A,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-05,695000.0,1.0,0.0,10026.0,2664
1,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11910.0,360 MANHATTAN AVENUE,-73.957293,40.804399,C,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-03,665000.0,1.0,0.0,10026.0,2575
2,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11911.0,360 MANHATTAN AVENUE,-73.957293,40.804399,H,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2012-02,721000.0,1.0,0.0,10026.0,2796
3,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,11912.0,360 MANHATTAN AVENUE,-73.957293,40.804399,B,1.0,15 CONDOS - 2-10 UNIT RESIDENTIAL,0.0,0.0,0.0,1.0,2013-08,955000.0,1.0,0.0,10026.0,3319
4,270958,upper west side,New York,3,21.85,21.49,21.52,21.61,21.74,21.82,21.66,21.19,21.39,21.54,21.75,21.46,21.78,22.52,22.82,23.13,22.73,22.69,23.24,23.98,23.75,23.7,24.15,23.9,23.74,23.82,24.08,25.4,25.92,26.22,25.96,25.39,25.13,...,25.69,25.55,25.34,25.38,25.25,26.43,26.98,26.93,26.88,26.57,28.3,27.99,28.0,27.56,27.05,26.27,26.16,26.06,25.64,25.58,24794.0,130 MANHATTAN AVENUE,-74.054322,40.744161,0,1.0,01 ONE FAMILY HOMES,0.0,3264.0,1190.0,1.0,2012-12,2100000.0,1.0,1900.0,10025.0,8155


In [607]:
len(df)

73157

In [648]:
df2 = df[(df.commercial_units<1) & (df.rent_estimate>1)]
df2 = df2.drop(columns=['Unnam','commercial_units','residential_units','total_units','apartment_number', 'borough',
       'building_class_category', 'gross_square_feet', 'land_square_feet','year_built','2012-01',
       '2012-02', '2012-03', '2012-04', '2012-05', '2012-06', '2012-07',
       '2012-08', '2012-09', '2012-10', '2012-11', '2012-12', '2013-01',
       '2013-02', '2013-03', '2013-04', '2013-05', '2013-06', '2013-07',
       '2013-08', '2013-09', '2013-10', '2013-11', '2013-12', '2014-01',
       '2014-02', '2014-03', '2014-04', '2014-05', '2014-06', '2014-07',
       '2014-08', '2014-09', '2014-10', '2014-11', '2014-12', '2015-01',
       '2015-02', '2015-03', '2015-04', '2015-05', '2015-06', '2015-07',
       '2015-08', '2015-09', '2015-10', '2015-11', '2015-12', '2016-01',
       '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07',
       '2016-08', '2016-09', '2016-10', '2016-11', '2016-12', '2017-01',
       '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07',
       '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01',
       '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07','SizeRank','CountyName','lng','lat'])

In [649]:
df2.describe()

Unnamed: 0,RegionID,sale_price,zip_code,rent_estimate
count,70146.0,70146.0,70146.0,70146.0
mean,263028.979372,2032094.0,10536.06371,7004.056
std,52177.208603,14876490.0,744.198675,46759.33
min,20239.0,100198.0,0.0,186.0
25%,270811.0,568000.0,10019.0,2105.0
50%,270885.0,920000.0,10069.0,3321.0
75%,270958.0,1756481.0,11216.0,6210.75
max,403222.0,2200000000.0,11416.0,6772565.0


In [650]:
df2.columns

Index(['RegionID', 'neighborhood', 'addr', 'sale_date', 'sale_price',
       'zip_code', 'rent_estimate'],
      dtype='object')

In [651]:
df2.to_csv('rent_estimate_df.csv')