In [26]:
import pandas as pd 
from tqdm import tqdm
from uszipcode import SearchEngine
import sqlalchemy
from sqlalchemy import create_engine, types

from config import postgre_pw


In [3]:
airbnb_df = pd.read_csv("data/airbnb.csv")
airbnb_filtered__df = airbnb_df[airbnb_df["number_of_reviews"] > 25]
airbnb_filtered__df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
15,74404.0,Luxury 3 bed/ 2 bath apt in Harlem w/ terrace,391325.0,G & S,Manhattan,Harlem,40.80276,-73.9567,Entire home/apt,250.0,14.0,31.0,8/22/2012,0.31,1.0,78.0
17,47362.0,"LARGE, COMFY 1BDR W/CHARACTER!!!",214148.0,Robert,Brooklyn,Bedford-Stuyvesant,40.68237,-73.9415,Entire home/apt,175.0,26.0,30.0,9/3/2012,0.29,1.0,364.0
25,23501.0,Monkey Retreat Manhattan,63318.0,Meka,Manhattan,Washington Heights,40.83927,-73.94281,Private room,65.0,2.0,68.0,11/1/2012,0.6,1.0,312.0
108,591565.0,Everyone who stays leaves happy!,2919467.0,Lisa,Manhattan,Tribeca,40.71552,-74.00749,Private room,229.0,1.0,62.0,4/27/2014,0.73,1.0,36.0
113,71384.0,Gigantic Private Brooklyn Loft!,365153.0,Ben,Brooklyn,Greenpoint,40.72898,-73.95552,Entire home/apt,229.0,1.0,50.0,5/13/2014,0.5,1.0,188.0


In [4]:
airbnb_filtered__df = airbnb_filtered__df[["neighbourhood_group", 
                                           "neighbourhood", 
                                           "latitude",
                                           "longitude",
                                           "room_type",
                                           "price", 
                                           "number_of_reviews",
                                           "last_review"]].copy()
airbnb_filtered__df.rename(columns={"neighbourhood_group":"borough"}, inplace=True)
airbnb_filtered__df.head()

Unnamed: 0,borough,neighbourhood,latitude,longitude,room_type,price,number_of_reviews,last_review
15,Manhattan,Harlem,40.80276,-73.9567,Entire home/apt,250.0,31.0,8/22/2012
17,Brooklyn,Bedford-Stuyvesant,40.68237,-73.9415,Entire home/apt,175.0,30.0,9/3/2012
25,Manhattan,Washington Heights,40.83927,-73.94281,Private room,65.0,68.0,11/1/2012
108,Manhattan,Tribeca,40.71552,-74.00749,Private room,229.0,62.0,4/27/2014
113,Brooklyn,Greenpoint,40.72898,-73.95552,Entire home/apt,229.0,50.0,5/13/2014


In [17]:
def get_zipcode_data(lat, lng): 
    search = SearchEngine(simple_zipcode=True)
    try:
        result = search.by_coordinates(lat, lng, radius=1, returns=1)
        zipcode_data = result[0].to_dict()
    except:
        zipcode_data = "Failure"
    return zipcode_data

In [18]:
tqdm.pandas(desc="Processing:", total = len(airbnb_filtered__df))
airbnb_filtered__df["ZipCode_data"] = airbnb_filtered__df.progress_apply(lambda row: get_zipcode_data(row["latitude"], row["longitude"]), axis=1)


Processing:: 100%|██████████████████████████████████████████████████████████████| 11617/11617 [01:03<00:00, 182.96it/s]


In [19]:
airbnb_filtered__df = airbnb_filtered__df[airbnb_filtered__df["ZipCode_data"]!="Failure"].copy() 

In [20]:
def get_key_value(selected_key, zipcode_data):
    selected_key_value = zipcode_data[selected_key]
    return selected_key_value

airbnb_filtered__df["zipcode"] = airbnb_filtered__df.progress_apply(lambda row: get_key_value("zipcode", row["ZipCode_data"]), axis=1)
airbnb_filtered__df["median_household_income"] = airbnb_filtered__df.progress_apply(lambda row: get_key_value("median_household_income", row["ZipCode_data"]), axis=1)
airbnb_filtered__df["median_home_value"] = airbnb_filtered__df.progress_apply(lambda row: get_key_value("median_home_value", row["ZipCode_data"]), axis=1)

Processing:: 100%|████████████████████████████████████████████████████████████| 11364/11364 [00:00<00:00, 35491.83it/s]
Processing:: 100%|████████████████████████████████████████████████████████████| 11364/11364 [00:00<00:00, 29138.51it/s]
Processing:: 100%|████████████████████████████████████████████████████████████| 11364/11364 [00:00<00:00, 30144.20it/s]


In [29]:

airbnb_filtered__df.index.names = ['id']
airbnb_filtered__df.rename(columns={"ZipCode_data":"zipcode_data"}, inplace=True)
airbnb_filtered__df.head()
#airbnb_filtered__df.to_sql(name=)

Unnamed: 0_level_0,borough,neighbourhood,latitude,longitude,room_type,price,number_of_reviews,last_review,zipcode_data,zipcode,median_household_income,median_home_value
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
15,Manhattan,Harlem,40.80276,-73.9567,Entire home/apt,250.0,31.0,8/22/2012,"{'zipcode': '10026', 'zipcode_type': 'Standard...",10026,43107.0,602400.0
17,Brooklyn,Bedford-Stuyvesant,40.68237,-73.9415,Entire home/apt,175.0,30.0,9/3/2012,"{'zipcode': '11216', 'zipcode_type': 'Standard...",11216,43996.0,623300.0
25,Manhattan,Washington Heights,40.83927,-73.94281,Private room,65.0,68.0,11/1/2012,"{'zipcode': '10032', 'zipcode_type': 'Standard...",10032,34568.0,334900.0
108,Manhattan,Tribeca,40.71552,-74.00749,Private room,229.0,62.0,4/27/2014,"{'zipcode': '10007', 'zipcode_type': 'Standard...",10007,216037.0,1000001.0
113,Brooklyn,Greenpoint,40.72898,-73.95552,Entire home/apt,229.0,50.0,5/13/2014,"{'zipcode': '11222', 'zipcode_type': 'Standard...",11222,63739.0,726500.0


In [34]:
rds_connection_string = f"postgres:{postgre_pw}@localhost:5432/AirBNB_NYC"
engine = create_engine(f'postgresql://{rds_connection_string}')

airbnb_filtered__df.to_sql(name="airbnb_listingss",
                          con=engine,
                          if_exists="append",
                          index=True,
                          dtype={'zipcode_data': sqlalchemy.types.JSON})

ProgrammingError: (psycopg2.ProgrammingError) can't adapt type 'dict'
[SQL: INSERT INTO airbnb_listingss (id, borough, neighbourhood, latitude, longitude, room_type, price, number_of_reviews, last_review, zipcode_data, zipcode, median_household_income, median_home_value) VALUES (%(id)s, %(borough)s, %(neighbourhood)s, %(latitude)s, %(longitude)s, %(room_type)s, %(price)s, %(number_of_reviews)s, %(last_review)s, %(zipcode_data)s, %(zipcode)s, %(median_household_income)s, %(median_home_value)s)]
[parameters: ({'id': 15, 'borough': 'Manhattan', 'neighbourhood': 'Harlem', 'latitude': 40.80276, 'longitude': -73.9567, 'room_type': 'Entire home/apt', 'price': 250.0, 'number_of_reviews': 31.0, 'last_review': '8/22/2012', 'zipcode_data': {'zipcode': '10026', 'zipcode_type': 'Standard', 'major_city': 'New York', 'post_office_city': 'New York, NY', 'common_city_list': ['New York'], 'coun ... (356 characters truncated) ... : 602400, 'median_household_income': 43107, 'bounds_west': -73.962475, 'bounds_east': -73.944667, 'bounds_north': 40.807546, 'bounds_south': 40.79691}, 'zipcode': '10026', 'median_household_income': 43107.0, 'median_home_value': 602400.0}, {'id': 17, 'borough': 'Brooklyn', 'neighbourhood': 'Bedford-Stuyvesant', 'latitude': 40.68237, 'longitude': -73.9415, 'room_type': 'Entire home/apt', 'price': 175.0, 'number_of_reviews': 30.0, 'last_review': '9/3/2012', 'zipcode_data': {'zipcode': '11216', 'zipcode_type': 'Standard', 'major_city': 'Brooklyn', 'post_office_city': 'Brooklyn, NY', 'common_city_list': ['Brooklyn'], 'coun ... (343 characters truncated) ...  623300, 'median_household_income': 43996, 'bounds_west': -73.957974, 'bounds_east': -73.937398, 'bounds_north': 40.692765, 'bounds_south': 40.669705}, 'zipcode': '11216', 'median_household_income': 43996.0, 'median_home_value': 623300.0}, {'id': 25, 'borough': 'Manhattan', 'neighbourhood': 'Washington Heights', 'latitude': 40.83927, 'longitude': -73.94281, 'room_type': 'Private room', 'price': 65.0, 'number_of_reviews': 68.0, 'last_review': '11/1/2012', 'zipcode_data': {'zipcode': '10032', 'zipcode_type': 'Standard', 'major_city': 'New York', 'post_office_city': 'New York, NY', 'common_city_list': ['New York'], 'coun ... (361 characters truncated) ...  334900, 'median_household_income': 34568, 'bounds_west': -73.950403, 'bounds_east': -73.934671, 'bounds_north': 40.850517, 'bounds_south': 40.829083}, 'zipcode': '10032', 'median_household_income': 34568.0, 'median_home_value': 334900.0}, {'id': 108, 'borough': 'Manhattan', 'neighbourhood': 'Tribeca', 'latitude': 40.71552, 'longitude': -74.00749, 'room_type': 'Private room', 'price': 229.0, 'number_of_reviews': 62.0, 'last_review': '4/27/2014', 'zipcode_data': {'zipcode': '10007', 'zipcode_type': 'Standard', 'major_city': 'New York', 'post_office_city': 'New York, NY', 'common_city_list': ['New York'], 'coun ... (375 characters truncated) ... 1000001, 'median_household_income': 216037, 'bounds_west': -74.013754, 'bounds_east': -74.000455, 'bounds_north': 40.71719, 'bounds_south': 40.709806}, 'zipcode': '10007', 'median_household_income': 216037.0, 'median_home_value': 1000001.0}, {'id': 113, 'borough': 'Brooklyn', 'neighbourhood': 'Greenpoint', 'latitude': 40.72898, 'longitude': -73.95551999999999, 'room_type': 'Entire home/apt', 'price': 229.0, 'number_of_reviews': 50.0, 'last_review': '5/13/2014', 'zipcode_data': {'zipcode': '11222', 'zipcode_type': 'Standard', 'major_city': 'Brooklyn', 'post_office_city': 'Brooklyn, NY', 'common_city_list': ['Brooklyn'], 'coun ... (337 characters truncated) ...  726500, 'median_household_income': 63739, 'bounds_west': -73.962795, 'bounds_east': -73.928137, 'bounds_north': 40.739446, 'bounds_south': 40.718088}, 'zipcode': '11222', 'median_household_income': 63739.0, 'median_home_value': 726500.0}, {'id': 126, 'borough': 'Brooklyn', 'neighbourhood': 'Crown Heights', 'latitude': 40.66898, 'longitude': -73.9571, 'room_type': 'Entire home/apt', 'price': 185.0, 'number_of_reviews': 34.0, 'last_review': '6/9/2014', 'zipcode_data': {'zipcode': '11225', 'zipcode_type': 'Standard', 'major_city': 'Brooklyn', 'post_office_city': 'Brooklyn, NY', 'common_city_list': ['Brooklyn'], 'coun ... (322 characters truncated) ...  615700, 'median_household_income': 42922, 'bounds_west': -73.965025, 'bounds_east': -73.939978, 'bounds_north': 40.671629, 'bounds_south': 40.654871}, 'zipcode': '11225', 'median_household_income': 42922.0, 'median_home_value': 615700.0}, {'id': 144, 'borough': 'Queens', 'neighbourhood': 'Long Island City', 'latitude': 40.7529, 'longitude': -73.92985, 'room_type': 'Private room', 'price': 63.0, 'number_of_reviews': 26.0, 'last_review': '7/13/2014', 'zipcode_data': {'zipcode': '11106', 'zipcode_type': 'Standard', 'major_city': 'Astoria', 'post_office_city': 'Astoria, NY', 'common_city_list': ['Astoria', 'Long Is  ... (357 characters truncated) ...  433600, 'median_household_income': 48720, 'bounds_west': -73.945111, 'bounds_east': -73.918966, 'bounds_north': 40.771745, 'bounds_south': 40.752425}, 'zipcode': '11106', 'median_household_income': 48720.0, 'median_home_value': 433600.0}, {'id': 246, 'borough': 'Manhattan', 'neighbourhood': 'Lower East Side', 'latitude': 40.71868, 'longitude': -73.99016999999999, 'room_type': 'Entire home/apt', 'price': 140.0, 'number_of_reviews': 53.0, 'last_review': '10/28/2014', 'zipcode_data': {'zipcode': '10002', 'zipcode_type': 'Standard', 'major_city': 'New York', 'post_office_city': 'New York, NY', 'common_city_list': ['New York', 'Knick ... (357 characters truncated) ...  535600, 'median_household_income': 33218, 'bounds_west': -73.997532, 'bounds_east': -73.973635, 'bounds_north': 40.724136, 'bounds_south': 40.708802}, 'zipcode': '10002', 'median_household_income': 33218.0, 'median_home_value': 535600.0}  ... displaying 10 of 11364 total bound parameter sets ...  {'id': 38807, 'borough': 'Queens', 'neighbourhood': 'Ozone Park', 'latitude': 40.68237, 'longitude': -73.84874, 'room_type': 'Private room', 'price': 45.0, 'number_of_reviews': 40.0, 'last_review': '7/8/2019', 'zipcode_data': {'zipcode': '11416', 'zipcode_type': 'Standard', 'major_city': 'Ozone Park', 'post_office_city': 'Ozone Park, NY', 'common_city_list': ['Ozone Park',  ... (338 characters truncated) ...  447200, 'median_household_income': 56724, 'bounds_west': -73.866295, 'bounds_east': -73.834916, 'bounds_north': 40.691364, 'bounds_south': 40.679073}, 'zipcode': '11416', 'median_household_income': 56724.0, 'median_home_value': 447200.0}, {'id': 38811, 'borough': 'Queens', 'neighbourhood': 'Bayside', 'latitude': 40.77067, 'longitude': -73.78278, 'room_type': 'Entire home/apt', 'price': 55.0, 'number_of_reviews': 37.0, 'last_review': '7/8/2019', 'zipcode_data': {'zipcode': '11360', 'zipcode_type': 'Standard', 'major_city': 'Bayside', 'post_office_city': 'Bayside, NY', 'common_city_list': ['Bayside', 'Flushing ... (336 characters truncated) ... : 524000, 'median_household_income': 76183, 'bounds_west': -73.79548, 'bounds_east': -73.762585, 'bounds_north': 40.790631, 'bounds_south': 40.769708}, 'zipcode': '11360', 'median_household_income': 76183.0, 'median_home_value': 524000.0})]
(Background on this error at: http://sqlalche.me/e/f405)