# Introduction

This notebook performs EDA on the Airbnb listings data http://data.insideairbnb.com/australia/nsw/sydney/2021-04-10/data/listings.csv.gz.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime
import sqlalchemy as sa
import os
import gzip
import shutil
import requests
import pandas as pd
from pathlib import Path
from dotenv import (
    load_dotenv,
    find_dotenv
)
import psycopg2
from src.data.database import (
    get_connection_string
)

from airflow.operators.python import PythonOperator
from airflow.providers.postgres.operators.postgres import PostgresOperator
from airflow.providers.postgres.hooks.postgres import PostgresHook

In [3]:
load_dotenv(find_dotenv())

project_dir = Path(find_dotenv()).parent
data_dir = project_dir / 'data'
raw_data_dir = data_dir / 'raw'
interim_data_dir = data_dir / 'interim'
reports_dir = project_dir / 'reports'

In [4]:
pd.set_option('display.max_columns', 100)

# Load data

Test with the latest data set.

In [5]:
path = raw_data_dir / '2021-04-10.gz'
df = pd.read_csv(path, compression='gzip')

In [6]:
df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,11156,https://www.airbnb.com/rooms/11156,20210410042103,2021-04-12,An Oasis in the City,Very central to the city which can be reached ...,"It is very close to everything and everywhere,...",https://a0.muscache.com/pictures/2797669/17895...,40855,https://www.airbnb.com/users/show/40855,Colleen,2009-09-23,"Potts Point, New South Wales, Australia","Recently retired, I've lived & worked on 4 con...",,,,f,https://a0.muscache.com/im/users/40855/profile...,https://a0.muscache.com/im/users/40855/profile...,Potts Point,1.0,1.0,"['email', 'phone', 'reviews']",t,f,"Potts Point, New South Wales, Australia",Sydney,,-33.86767,151.22497,Private room in apartment,Private room,1,,1 shared bath,1.0,0.0,"[""Dishwasher"", ""Backyard"", ""Kitchen"", ""Shower ...",$65.00,2,180,2,2,180,180,2.0,180.0,,t,29,59,89,364,2021-04-12,196,0,0,2009-12-05,2020-03-13,92.0,10.0,9.0,10.0,10.0,10.0,10.0,,f,1,0,1,0,1.42
1,12351,https://www.airbnb.com/rooms/12351,20210410042103,2021-04-15,Sydney City & Harbour at the door,Come stay with Vinh & Stuart (Awarded as one o...,"Pyrmont is an inner-city village of Sydney, on...",https://a0.muscache.com/pictures/763ad5c8-c951...,17061,https://www.airbnb.com/users/show/17061,Stuart,2009-05-14,"Sydney, New South Wales, Australia","G'Day from Australia!\r\n\r\nHe's Vinh, and I'...",,,,f,https://a0.muscache.com/im/users/17061/profile...,https://a0.muscache.com/im/users/17061/profile...,Pyrmont,2.0,2.0,"['email', 'phone', 'manual_online', 'reviews',...",t,t,"Pyrmont, New South Wales, Australia",Sydney,,-33.8649,151.19171,Private room in townhouse,Private room,2,,1 shared bath,1.0,1.0,"[""Microwave"", ""Patio or balcony"", ""Wifi"", ""Dis...","$14,315.00",2,7,2,2,7,7,2.0,7.0,,t,0,0,0,0,2021-04-15,526,0,0,2010-07-24,2019-09-22,95.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,2,0,2,0,4.03
2,14250,https://www.airbnb.com/rooms/14250,20210410042103,2021-04-14,Manly Harbour House,"Beautifully renovated, spacious and quiet, our...",Balgowlah Heights is one of the most prestigio...,https://a0.muscache.com/pictures/56935671/fdb8...,55948,https://www.airbnb.com/users/show/55948,Heidi,2009-11-20,"Sydney, New South Wales, Australia",I am a Canadian who has made Australia her hom...,within a few hours,90%,79%,t,https://a0.muscache.com/im/users/55948/profile...,https://a0.muscache.com/im/users/55948/profile...,Balgowlah,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,"Balgowlah, New South Wales, Australia",Manly,,-33.80084,151.26378,Entire house,Entire home/apt,6,,3 baths,3.0,3.0,"[""Stove"", ""Dedicated workspace"", ""Iron"", ""Pati...",$470.00,5,22,5,5,22,22,5.0,22.0,,t,0,0,0,122,2021-04-14,2,0,0,2016-01-02,2019-01-02,90.0,8.0,8.0,9.0,8.0,9.0,8.0,,f,2,2,0,0,0.03
3,15253,https://www.airbnb.com/rooms/15253,20210410042103,2021-04-12,Unique Designer Rooftop Apartment in City Loca...,Penthouse living at it best ... You will be st...,The location is really central and there is nu...,https://a0.muscache.com/pictures/46dcb8a1-5d5b...,59850,https://www.airbnb.com/users/show/59850,Morag,2009-12-03,"Sydney, New South Wales, Australia",I am originally Scottish but I have made Sydne...,within an hour,90%,95%,f,https://a0.muscache.com/im/pictures/user/730ee...,https://a0.muscache.com/im/pictures/user/730ee...,Darlinghurst,3.0,3.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87964,151.2168,Private room in apartment,Private room,2,,1 private bath,1.0,1.0,"[""Dishwasher"", ""Kitchen"", ""Shower gel"", ""Cooki...",$80.00,2,90,2,2,90,90,2.0,90.0,,t,21,48,78,336,2021-04-12,367,3,0,2012-02-23,2021-03-07,88.0,10.0,9.0,10.0,10.0,10.0,9.0,,t,1,0,1,0,3.3
4,44545,https://www.airbnb.com/rooms/44545,20210410042103,2021-04-13,Sunny Darlinghurst Warehouse Apartment,Sunny warehouse/loft apartment in the heart of...,Darlinghurst is home to some of Sydney's best ...,https://a0.muscache.com/pictures/a88d8e14-4f63...,112237,https://www.airbnb.com/users/show/112237,Atari,2010-04-22,"Sydney, New South Wales, Australia",Curious about the world and full of wanderlust...,,,,t,https://a0.muscache.com/im/pictures/user/34708...,https://a0.muscache.com/im/pictures/user/34708...,Darlinghurst,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87888,151.21439,Entire loft,Entire home/apt,2,,1 bath,1.0,1.0,"[""Dishwasher"", ""Kitchen"", ""Cooking basics"", ""C...",$130.00,3,365,3,3,365,365,3.0,365.0,,t,0,0,0,0,2021-04-13,76,0,0,2010-10-20,2020-01-03,97.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.6


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32679 entries, 0 to 32678
Data columns (total 74 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   id                                            32679 non-null  int64  
 1   listing_url                                   32679 non-null  object 
 2   scrape_id                                     32679 non-null  int64  
 3   last_scraped                                  32679 non-null  object 
 4   name                                          32667 non-null  object 
 5   description                                   31442 non-null  object 
 6   neighborhood_overview                         19345 non-null  object 
 7   picture_url                                   32679 non-null  object 
 8   host_id                                       32679 non-null  int64  
 9   host_url                                      32679 non-null 

In [9]:
path = interim_data_dir / '2021-04-10'

In [11]:
df.to_csv(path.with_suffix('.csv'), index=False)
df.to_parquet(path.with_suffix('.parquet'), index=False)

In [12]:
df = pd.read_parquet(path.with_suffix('.parquet'))

# Connect to Postgres

In [40]:
conn_string = get_connection_string()

In [41]:
conn_string

'postgresql+psycopg2://airflow:airflow@postgres:5432/airflow'

In [16]:
table_name = 'test_table'

In [18]:
df.to_sql(con=conn_string,
                 name=table_name,
                 if_exists='replace',
                 index=False)

In [17]:
# Test read from database
query = f"""
SELECT *
FROM {table_name}
"""

df_from_sql = pd.read_sql(con=conn_string,
                          sql=query)
df_from_sql

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,11156,https://www.airbnb.com/rooms/11156,20210410042103,2021-04-12,An Oasis in the City,Very central to the city which can be reached ...,"It is very close to everything and everywhere,...",https://a0.muscache.com/pictures/2797669/17895...,40855,https://www.airbnb.com/users/show/40855,Colleen,2009-09-23,"Potts Point, New South Wales, Australia","Recently retired, I've lived & worked on 4 con...",,,,f,https://a0.muscache.com/im/users/40855/profile...,https://a0.muscache.com/im/users/40855/profile...,Potts Point,1.0,1.0,"['email', 'phone', 'reviews']",t,f,"Potts Point, New South Wales, Australia",Sydney,,-33.86767,151.22497,Private room in apartment,Private room,1,,1 shared bath,1.0,0.0,"[""Dishwasher"", ""Backyard"", ""Kitchen"", ""Shower ...",$65.00,2,180,2,2,180,180,2.0,180.0,,t,29,59,89,364,2021-04-12,196,0,0,2009-12-05,2020-03-13,92.0,10.0,9.0,10.0,10.0,10.0,10.0,,f,1,0,1,0,1.42
1,12351,https://www.airbnb.com/rooms/12351,20210410042103,2021-04-15,Sydney City & Harbour at the door,Come stay with Vinh & Stuart (Awarded as one o...,"Pyrmont is an inner-city village of Sydney, on...",https://a0.muscache.com/pictures/763ad5c8-c951...,17061,https://www.airbnb.com/users/show/17061,Stuart,2009-05-14,"Sydney, New South Wales, Australia","G'Day from Australia!\r\n\r\nHe's Vinh, and I'...",,,,f,https://a0.muscache.com/im/users/17061/profile...,https://a0.muscache.com/im/users/17061/profile...,Pyrmont,2.0,2.0,"['email', 'phone', 'manual_online', 'reviews',...",t,t,"Pyrmont, New South Wales, Australia",Sydney,,-33.86490,151.19171,Private room in townhouse,Private room,2,,1 shared bath,1.0,1.0,"[""Microwave"", ""Patio or balcony"", ""Wifi"", ""Dis...","$14,315.00",2,7,2,2,7,7,2.0,7.0,,t,0,0,0,0,2021-04-15,526,0,0,2010-07-24,2019-09-22,95.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,2,0,2,0,4.03
2,14250,https://www.airbnb.com/rooms/14250,20210410042103,2021-04-14,Manly Harbour House,"Beautifully renovated, spacious and quiet, our...",Balgowlah Heights is one of the most prestigio...,https://a0.muscache.com/pictures/56935671/fdb8...,55948,https://www.airbnb.com/users/show/55948,Heidi,2009-11-20,"Sydney, New South Wales, Australia",I am a Canadian who has made Australia her hom...,within a few hours,90%,79%,t,https://a0.muscache.com/im/users/55948/profile...,https://a0.muscache.com/im/users/55948/profile...,Balgowlah,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,"Balgowlah, New South Wales, Australia",Manly,,-33.80084,151.26378,Entire house,Entire home/apt,6,,3 baths,3.0,3.0,"[""Stove"", ""Dedicated workspace"", ""Iron"", ""Pati...",$470.00,5,22,5,5,22,22,5.0,22.0,,t,0,0,0,122,2021-04-14,2,0,0,2016-01-02,2019-01-02,90.0,8.0,8.0,9.0,8.0,9.0,8.0,,f,2,2,0,0,0.03
3,15253,https://www.airbnb.com/rooms/15253,20210410042103,2021-04-12,Unique Designer Rooftop Apartment in City Loca...,Penthouse living at it best ... You will be st...,The location is really central and there is nu...,https://a0.muscache.com/pictures/46dcb8a1-5d5b...,59850,https://www.airbnb.com/users/show/59850,Morag,2009-12-03,"Sydney, New South Wales, Australia",I am originally Scottish but I have made Sydne...,within an hour,90%,95%,f,https://a0.muscache.com/im/pictures/user/730ee...,https://a0.muscache.com/im/pictures/user/730ee...,Darlinghurst,3.0,3.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87964,151.21680,Private room in apartment,Private room,2,,1 private bath,1.0,1.0,"[""Dishwasher"", ""Kitchen"", ""Shower gel"", ""Cooki...",$80.00,2,90,2,2,90,90,2.0,90.0,,t,21,48,78,336,2021-04-12,367,3,0,2012-02-23,2021-03-07,88.0,10.0,9.0,10.0,10.0,10.0,9.0,,t,1,0,1,0,3.30
4,9995212,https://www.airbnb.com/rooms/9995212,20210410042103,2021-04-13,Close to everything Sydney!,Beautiful 1 bed apt in Pyrmont.<br /><br />Lar...,,https://a0.muscache.com/pictures/599acdd7-87b5...,10697503,https://www.airbnb.com/users/show/10697503,,,,,,,,,,,,,,,,,,Sydney,,-33.86639,151.19215,Entire apartment,Entire home/apt,2,,1 bath,1.0,1.0,"[""Pool"", ""Kitchen"", ""Iron"", ""Cable TV"", ""TV wi...",$280.00,5,10,5,5,10,10,5.0,10.0,,t,0,0,0,0,2021-04-13,0,0,0,,,,,,,,,,,t,1,1,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32674,49115625,https://www.airbnb.com/rooms/49115625,20210410042103,2021-04-12,MAS3 Quiet Spacious 2 Bedrooms 5Mins walk to T...,This contemporary light-filled two-bedroom 2 b...,,https://a0.muscache.com/pictures/b42eef72-98f2...,382207272,https://www.airbnb.com/users/show/382207272,Victor,2020-12-29,"Ryde, New South Wales, Australia",,within a few hours,94%,65%,f,https://a0.muscache.com/im/pictures/user/3537d...,https://a0.muscache.com/im/pictures/user/3537d...,,7.0,7.0,"['email', 'phone', 'jumio', 'offline_governmen...",t,t,,Botany Bay,,-33.92167,151.18408,Entire apartment,Entire home/apt,5,,2 baths,2.0,2.0,"[""Heating"", ""Washer"", ""Shampoo"", ""Essentials"",...",$180.00,2,1125,2,2,1125,1125,2.0,1125.0,,t,25,55,85,175,2021-04-12,0,0,0,,,,,,,,,,,f,26,26,0,0,
32675,49118065,https://www.airbnb.com/rooms/49118065,20210410042103,2021-04-12,"Marvellous Marrickville apartment, 15 mins to ...",For those wanting easy access to inner Sydney'...,,https://a0.muscache.com/pictures/94a3bd3f-4ac2...,4358703,https://www.airbnb.com/users/show/4358703,Galina,2012-12-08,"Sydney, New South Wales, Australia",My name is Galina. I'm a proud mum of an 8 yea...,within an hour,100%,98%,f,https://a0.muscache.com/im/pictures/user/c226a...,https://a0.muscache.com/im/pictures/user/c226a...,Marrickville,7.0,7.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,,Marrickville,,-33.91559,151.15586,Entire house,Entire home/apt,4,,1 bath,1.0,2.0,"[""Bed linens"", ""Microwave"", ""Heating"", ""Washer...",$88.00,3,31,3,3,1125,1125,3.0,1125.0,,t,9,35,65,156,2021-04-12,0,0,0,,,,,,,,,,,t,3,3,0,0,
32676,49118280,https://www.airbnb.com/rooms/49118280,20210410042103,2021-04-12,Peaceful Pittwater Views from Eclectic Getaway,Immerse yourself in this north facing peaceful...,,https://a0.muscache.com/pictures/96d93fbd-590d...,95214788,https://www.airbnb.com/users/show/95214788,Cushie - Concierge Services,2016-09-15,"Avalon Beach, New South Wales, Australia",cushie provides hosting management and concier...,within an hour,86%,89%,f,https://a0.muscache.com/im/pictures/user/8b55f...,https://a0.muscache.com/im/pictures/user/8b55f...,Palm Beach,45.0,45.0,"['email', 'phone', 'google', 'reviews', 'offli...",t,t,,Pittwater,,-33.63855,151.31895,Entire house,Entire home/apt,4,,2 baths,2.0,2.0,"[""Fire extinguisher"", ""Heating"", ""Washer"", ""Sh...",$400.00,1,1125,1,1,1125,1125,1.0,1125.0,,t,7,7,7,7,2021-04-12,0,0,0,,,,,,,,,,,t,35,35,0,0,
32677,49118480,https://www.airbnb.com/rooms/49118480,20210410042103,2021-04-13,FD50 Newly Furnished 1 Bedroom in Five Dock,Five dock is a real gem in waterside centrally...,,https://a0.muscache.com/pictures/b6c19d4b-424f...,382207272,https://www.airbnb.com/users/show/382207272,Victor,2020-12-29,"Ryde, New South Wales, Australia",,within a few hours,94%,65%,f,https://a0.muscache.com/im/pictures/user/3537d...,https://a0.muscache.com/im/pictures/user/3537d...,,7.0,7.0,"['email', 'phone', 'jumio', 'offline_governmen...",t,t,,Canada Bay,,-33.86390,151.13073,Entire apartment,Entire home/apt,2,,1 bath,1.0,1.0,"[""Heating"", ""Washer"", ""Shampoo"", ""Essentials"",...",$110.00,2,1125,2,2,1125,1125,2.0,1125.0,,t,14,44,74,75,2021-04-13,0,0,0,,,,,,,,,,,f,26,26,0,0,


In [20]:
ps_pg_hook = PostgresHook(postgres_conn_id="postgres")
conn_ps = ps_pg_hook.get_conn()

OperationalError: (sqlite3.OperationalError) no such table: connection
[SQL: SELECT connection.password AS connection_password, connection.extra AS connection_extra, connection.id AS connection_id, connection.conn_id AS connection_conn_id, connection.conn_type AS connection_conn_type, connection.description AS connection_description, connection.host AS connection_host, connection.schema AS connection_schema, connection.login AS connection_login, connection.port AS connection_port, connection.is_encrypted AS connection_is_encrypted, connection.is_extra_encrypted AS connection_is_extra_encrypted 
FROM connection 
WHERE connection.conn_id = ?
 LIMIT ? OFFSET ?]
[parameters: ('postgres', 1, 0)]
(Background on this error at: http://sqlalche.me/e/14/e3q8)

In [10]:
df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,11156,https://www.airbnb.com/rooms/11156,20210410042103,2021-04-12,An Oasis in the City,Very central to the city which can be reached ...,"It is very close to everything and everywhere,...",https://a0.muscache.com/pictures/2797669/17895...,40855,https://www.airbnb.com/users/show/40855,Colleen,2009-09-23,"Potts Point, New South Wales, Australia","Recently retired, I've lived & worked on 4 con...",,,,f,https://a0.muscache.com/im/users/40855/profile...,https://a0.muscache.com/im/users/40855/profile...,Potts Point,1.0,1.0,"['email', 'phone', 'reviews']",t,f,"Potts Point, New South Wales, Australia",Sydney,,-33.86767,151.22497,Private room in apartment,Private room,1,,1 shared bath,1.0,0.0,"[""Dishwasher"", ""Backyard"", ""Kitchen"", ""Shower ...",$65.00,2,180,2,2,180,180,2.0,180.0,,t,29,59,89,364,2021-04-12,196,0,0,2009-12-05,2020-03-13,92.0,10.0,9.0,10.0,10.0,10.0,10.0,,f,1,0,1,0,1.42
1,12351,https://www.airbnb.com/rooms/12351,20210410042103,2021-04-15,Sydney City & Harbour at the door,Come stay with Vinh & Stuart (Awarded as one o...,"Pyrmont is an inner-city village of Sydney, on...",https://a0.muscache.com/pictures/763ad5c8-c951...,17061,https://www.airbnb.com/users/show/17061,Stuart,2009-05-14,"Sydney, New South Wales, Australia","G'Day from Australia!\r\n\r\nHe's Vinh, and I'...",,,,f,https://a0.muscache.com/im/users/17061/profile...,https://a0.muscache.com/im/users/17061/profile...,Pyrmont,2.0,2.0,"['email', 'phone', 'manual_online', 'reviews',...",t,t,"Pyrmont, New South Wales, Australia",Sydney,,-33.8649,151.19171,Private room in townhouse,Private room,2,,1 shared bath,1.0,1.0,"[""Microwave"", ""Patio or balcony"", ""Wifi"", ""Dis...","$14,315.00",2,7,2,2,7,7,2.0,7.0,,t,0,0,0,0,2021-04-15,526,0,0,2010-07-24,2019-09-22,95.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,2,0,2,0,4.03
2,14250,https://www.airbnb.com/rooms/14250,20210410042103,2021-04-14,Manly Harbour House,"Beautifully renovated, spacious and quiet, our...",Balgowlah Heights is one of the most prestigio...,https://a0.muscache.com/pictures/56935671/fdb8...,55948,https://www.airbnb.com/users/show/55948,Heidi,2009-11-20,"Sydney, New South Wales, Australia",I am a Canadian who has made Australia her hom...,within a few hours,90%,79%,t,https://a0.muscache.com/im/users/55948/profile...,https://a0.muscache.com/im/users/55948/profile...,Balgowlah,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,"Balgowlah, New South Wales, Australia",Manly,,-33.80084,151.26378,Entire house,Entire home/apt,6,,3 baths,3.0,3.0,"[""Stove"", ""Dedicated workspace"", ""Iron"", ""Pati...",$470.00,5,22,5,5,22,22,5.0,22.0,,t,0,0,0,122,2021-04-14,2,0,0,2016-01-02,2019-01-02,90.0,8.0,8.0,9.0,8.0,9.0,8.0,,f,2,2,0,0,0.03
3,15253,https://www.airbnb.com/rooms/15253,20210410042103,2021-04-12,Unique Designer Rooftop Apartment in City Loca...,Penthouse living at it best ... You will be st...,The location is really central and there is nu...,https://a0.muscache.com/pictures/46dcb8a1-5d5b...,59850,https://www.airbnb.com/users/show/59850,Morag,2009-12-03,"Sydney, New South Wales, Australia",I am originally Scottish but I have made Sydne...,within an hour,90%,95%,f,https://a0.muscache.com/im/pictures/user/730ee...,https://a0.muscache.com/im/pictures/user/730ee...,Darlinghurst,3.0,3.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87964,151.2168,Private room in apartment,Private room,2,,1 private bath,1.0,1.0,"[""Dishwasher"", ""Kitchen"", ""Shower gel"", ""Cooki...",$80.00,2,90,2,2,90,90,2.0,90.0,,t,21,48,78,336,2021-04-12,367,3,0,2012-02-23,2021-03-07,88.0,10.0,9.0,10.0,10.0,10.0,9.0,,t,1,0,1,0,3.3
4,44545,https://www.airbnb.com/rooms/44545,20210410042103,2021-04-13,Sunny Darlinghurst Warehouse Apartment,Sunny warehouse/loft apartment in the heart of...,Darlinghurst is home to some of Sydney's best ...,https://a0.muscache.com/pictures/a88d8e14-4f63...,112237,https://www.airbnb.com/users/show/112237,Atari,2010-04-22,"Sydney, New South Wales, Australia",Curious about the world and full of wanderlust...,,,,t,https://a0.muscache.com/im/pictures/user/34708...,https://a0.muscache.com/im/pictures/user/34708...,Darlinghurst,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87888,151.21439,Entire loft,Entire home/apt,2,,1 bath,1.0,1.0,"[""Dishwasher"", ""Kitchen"", ""Cooking basics"", ""C...",$130.00,3,365,3,3,365,365,3.0,365.0,,t,0,0,0,0,2021-04-13,76,0,0,2010-10-20,2020-01-03,97.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.6


In [20]:
(
    df['price']
    .str.replace(pat='$', repl='', regex=False)
    .str.replace(pat=',', repl='', regex=False)
    .str.replace(pat='.', repl='', regex=False)
    .astype(int)
    )

0           6500
1        1431500
2          47000
3           8000
4          13000
          ...   
32674       8800
32675      40000
32676      12800
32677      11000
32678      11000
Name: price, Length: 32679, dtype: int64

In [34]:
path = raw_data_dir / '2021-03-04.gz'
df = pd.read_csv(path, compression='gzip')

In [35]:
(
    df['price']
    .str.replace(pat='$', repl='', regex=False)
    .str.replace(pat=',', repl='', regex=False)
    .str.replace(pat='.', repl='', regex=False)
    .astype(int)
)

0           6500
1        1431500
2          47000
3           7900
4          13000
          ...   
33224     110000
33225      25000
33226      24000
33227       5000
33228       7900
Name: price, Length: 33229, dtype: int64

In [26]:
fields = [
        'id',
        'listing_url',
        'scrape_id',
        'last_scraped',
        'name',
        'summary',
        'space',
        'description',
        'experiences_offered',
        'neighborhood_overview',
        'notes',
        'transit',
        'access',
        'interaction',
        'house_rules',
        'thumbnail_url',
        'medium_url',
        'picture_url',
        'xl_picture_url',
        'host_id',
        'host_url',
        'host_name',
        'host_since',
        'host_location',
        'host_about',
        'host_response_time',
        'host_response_rate',
        'host_acceptance_rate',
        'host_is_superhost',
        'host_thumbnail_url',
        'host_picture_url',
        'host_neighbourhood',
        'host_listings_count',
        'host_total_listings_count',
        'host_verifications',
        'host_has_profile_pic',
        'host_identity_verified',
        'street',
        'neighbourhood',
        'neighbourhood_cleansed',
        'neighbourhood_group_cleansed',
        'city',
        'state',
        'zipcode',
        'market',
        'smart_location',
        'country_code',
        'country',
        'latitude',
        'longitude',
        'is_location_exact',
        'property_type',
        'room_type',
        'accommodates',
        'bathrooms',
        'bedrooms',
        'beds',
        'bed_type',
        'amenities',
        'square_feet',
        'price',
        'weekly_price',
        'monthly_price',
        'security_deposit',
        'cleaning_fee',
        'guests_included',
        'extra_people',
        'minimum_nights',
        'maximum_nights',
        'minimum_minimum_nights',
        'maximum_minimum_nights',
        'minimum_maximum_nights',
        'maximum_maximum_nights',
        'minimum_nights_avg_ntm',
        'maximum_nights_avg_ntm',
        'calendar_updated',
        'has_availability',
        'availability_30',
        'availability_60',
        'availability_90',
        'availability_365',
        'calendar_last_scraped',
        'number_of_reviews',
        'number_of_reviews_ltm',
        'first_review',
        'last_review',
        'review_scores_rating',
        'review_scores_accuracy',
        'review_scores_cleanliness',
        'review_scores_checkin',
        'review_scores_communication',
        'review_scores_location',
        'review_scores_value',
        'requires_license',
        'license',
        'jurisdiction_names',
        'instant_bookable',
        'is_business_travel_ready',
        'cancellation_policy',
        'require_guest_profile_picture',
        'require_guest_phone_verification',
        'calculated_host_listings_count',
        'calculated_host_listings_count_entire_homes',
        'calculated_host_listings_count_private_rooms',
        'calculated_host_listings_count_shared_rooms',
        'reviews_per_month',
    ]

subset = [
        'host_name',
        'host_id',
        'latitude',
        'longitude',
    ]

In [36]:
df = df.filter(items=fields).dropna(subset=subset, how='any')

In [38]:
year = 2021
month = 3
day = 1
df = (
    df
    .assign(execution_date = datetime(year, month, day))
)

In [39]:
df

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month,execution_date
0,11156,https://www.airbnb.com/rooms/11156,20210304191223,2021-03-06,An Oasis in the City,Very central to the city which can be reached ...,"It is very close to everything and everywhere,...",https://a0.muscache.com/pictures/2797669/17895...,40855,https://www.airbnb.com/users/show/40855,Colleen,2009-09-23,"Potts Point, New South Wales, Australia","Recently retired, I've lived & worked on 4 con...",,,,f,https://z0.muscache.cn/im/users/40855/profile_...,https://z0.muscache.cn/im/users/40855/profile_...,Potts Point,1.0,1.0,"['email', 'phone', 'reviews']",t,f,"Potts Point, New South Wales, Australia",Sydney,,-33.86934,151.22688,Private room in apartment,Private room,1,,1.0,0.0,"[""Bed linens"", ""Microwave"", ""TV"", ""Stove"", ""Lo...",$65.00,2,180,2,2,180,180,2.0,180.0,,t,29,59,89,364,2021-03-06,196,1,2009-12-05,2020-03-13,92.0,10.0,9.0,10.0,10.0,10.0,10.0,,f,1,0,1,0,1.43,2021-03-01
1,12351,https://www.airbnb.com/rooms/12351,20210304191223,2021-03-06,Sydney City & Harbour at the door,Come stay with Vinh & Stuart (Awarded as one o...,"Pyrmont is an inner-city village of Sydney, on...",https://a0.muscache.com/pictures/763ad5c8-c951...,17061,https://www.airbnb.com/users/show/17061,Stuart,2009-05-14,"Sydney, New South Wales, Australia","G'Day from Australia!\r\n\r\nHe's Vinh, and I'...",,,,f,https://a0.muscache.com/im/users/17061/profile...,https://a0.muscache.com/im/users/17061/profile...,Pyrmont,2.0,2.0,"['email', 'phone', 'manual_online', 'reviews',...",t,t,"Pyrmont, New South Wales, Australia",Sydney,,-33.86515,151.19190,Private room in townhouse,Private room,2,,1.0,1.0,"[""Microwave"", ""TV"", ""Stove"", ""Oven"", ""Host gre...","$14,315.00",2,7,2,2,7,7,2.0,7.0,,t,0,0,0,0,2021-03-06,526,0,2010-07-24,2019-09-22,95.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,2,0,2,0,4.07,2021-03-01
2,14250,https://www.airbnb.com/rooms/14250,20210304191223,2021-03-06,Manly Harbour House,"Beautifully renovated, spacious and quiet, our...",Balgowlah Heights is one of the most prestigio...,https://a0.muscache.com/pictures/56935671/fdb8...,55948,https://www.airbnb.com/users/show/55948,Heidi,2009-11-20,"Sydney, New South Wales, Australia",I am a Canadian who has made Australia her hom...,within a few hours,90%,78%,t,https://a0.muscache.com/im/users/55948/profile...,https://a0.muscache.com/im/users/55948/profile...,Balgowlah,2.0,2.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,"Balgowlah, New South Wales, Australia",Manly,,-33.80093,151.26172,Entire house,Entire home/apt,6,,3.0,3.0,"[""Microwave"", ""Free street parking"", ""TV"", ""St...",$470.00,5,22,5,5,22,22,5.0,22.0,,t,0,0,0,83,2021-03-06,2,0,2016-01-02,2019-01-02,90.0,8.0,8.0,9.0,8.0,9.0,8.0,,f,2,2,0,0,0.03,2021-03-01
3,15253,https://www.airbnb.com/rooms/15253,20210304191223,2021-03-06,Unique Designer Rooftop Apartment in City Loca...,Penthouse living at it best ... You will be st...,The location is really central and there is nu...,https://a0.muscache.com/pictures/46dcb8a1-5d5b...,59850,https://www.airbnb.com/users/show/59850,Morag,2009-12-03,"Sydney, New South Wales, Australia",I am originally Scottish but I have made Sydne...,within an hour,100%,93%,f,https://a0.muscache.com/im/pictures/user/730ee...,https://a0.muscache.com/im/pictures/user/730ee...,Darlinghurst,3.0,3.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87964,151.21680,Private room in apartment,Private room,2,,1.0,1.0,"[""Bed linens"", ""Microwave"", ""Free street parki...",$79.00,2,90,2,2,90,90,2.0,90.0,,t,23,53,80,355,2021-03-06,366,7,2012-02-23,2021-02-18,88.0,10.0,9.0,10.0,10.0,10.0,9.0,,t,1,0,1,0,3.33,2021-03-01
4,44545,https://www.airbnb.com/rooms/44545,20210304191223,2021-03-06,Sunny Darlinghurst Warehouse Apartment,Sunny warehouse/loft apartment in the heart of...,Darlinghurst is home to some of Sydney's best ...,https://a0.muscache.com/pictures/a88d8e14-4f63...,112237,https://www.airbnb.com/users/show/112237,Atari,2010-04-22,"Sydney, New South Wales, Australia",Curious about the world and full of wanderlust...,,,,t,https://a0.muscache.com/im/pictures/user/34708...,https://a0.muscache.com/im/pictures/user/34708...,Darlinghurst,1.0,1.0,"['email', 'phone', 'facebook', 'reviews', 'jum...",t,t,"Darlinghurst, New South Wales, Australia",Sydney,,-33.87888,151.21439,Entire loft,Entire home/apt,2,,1.0,1.0,"[""Microwave"", ""TV"", ""Stove"", ""Long term stays ...",$130.00,3,365,3,3,365,365,3.0,365.0,,t,0,0,0,0,2021-03-06,76,0,2010-10-20,2020-01-03,97.0,10.0,10.0,10.0,10.0,10.0,10.0,,f,1,1,0,0,0.60,2021-03-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33224,48450109,https://www.airbnb.com/rooms/48450109,20210304191223,2021-03-07,Tinkerbell Palm Beach,Tinkerbell is an idyllic Pittwater waterfront ...,,https://a0.muscache.com/pictures/2a2ceefa-4935...,269689583,https://www.airbnb.com/users/show/269689583,Jill,2019-06-19,"Palm Beach, New South Wales, Australia",I live in Palm Beach and have a vast knowledge...,within a few hours,100%,84%,t,https://a0.muscache.com/im/pictures/user/2e520...,https://a0.muscache.com/im/pictures/user/2e520...,Avalon,13.0,13.0,"['email', 'phone', 'offline_government_id', 's...",t,t,,Pittwater,,-33.61037,151.32497,Entire house,Entire home/apt,6,,3.0,4.0,"[""Heating"", ""Dryer"", ""Kitchen"", ""Washer""]","$1,100.00",2,21,2,2,21,21,2.0,21.0,,t,10,40,70,269,2021-03-07,0,0,,,,,,,,,,,f,13,13,0,0,,2021-03-01
33225,48451384,https://www.airbnb.com/rooms/48451384,20210304191223,2021-03-06,Narrabeen beach stay,This cute lakeside apartment is full of natura...,,https://a0.muscache.com/pictures/e924e10b-12c6...,53242465,https://www.airbnb.com/users/show/53242465,Shelley,2016-01-04,"New South Wales, Australia",,within a few hours,100%,76%,t,https://a0.muscache.com/im/pictures/user/14b46...,https://a0.muscache.com/im/pictures/user/14b46...,,3.0,3.0,"['email', 'phone', 'reviews', 'jumio', 'offlin...",t,t,,Warringah,,-33.70924,151.29878,Entire apartment,Entire home/apt,2,,1.0,1.0,"[""Smoke alarm"", ""Wifi"", ""Kitchen"", ""TV"", ""Cook...",$250.00,2,1125,2,2,1125,1125,2.0,1125.0,,t,27,57,87,268,2021-03-06,0,0,,,,,,,,,,,f,2,2,0,0,,2021-03-01
33226,48452861,https://www.airbnb.com/rooms/48452861,20210304191223,2021-03-06,"CHIC, CONTEMPORARY CAVENDISH - Hosted By L'Abode","DELUXE, NEWLY RENOVATED, MODERN AND SPACIOUS A...",Points of Interests: <br /><br />Want to feel ...,https://a0.muscache.com/pictures/c6a5ebd4-79fb...,7409213,https://www.airbnb.com/users/show/7409213,L'Abode Accommodation Specialist,2013-07-10,"Sydney, New South Wales, Australia",L'Abode Accommodation specialises in short and...,within an hour,98%,93%,f,https://z0.muscache.cn/im/pictures/user/118e20...,https://z0.muscache.cn/im/pictures/user/118e20...,Double Bay,185.0,185.0,"['email', 'phone', 'manual_online', 'reviews',...",t,t,"Double Bay, New South Wales, Australia",Woollahra,,-33.88109,151.24564,Entire apartment,Entire home/apt,4,,2.0,3.0,"[""Bed linens"", ""Laundromat nearby"", ""Microwave...",$240.00,1,365,3,7,365,365,3.4,365.0,,t,0,0,20,295,2021-03-06,0,0,,,,,,,,,,,t,130,130,0,0,,2021-03-01
33227,48453440,https://www.airbnb.com/rooms/48453440,20210304191223,2021-03-07,Luxury Double Room at Mascot with Great Location,Located in the heart of Sydney with fantastic ...,,https://a0.muscache.com/pictures/ffba58c6-27d4...,391061976,https://www.airbnb.com/users/show/391061976,Peter,2021-03-04,AU,,,,100%,f,https://a0.muscache.com/im/pictures/user/d6af5...,https://a0.muscache.com/im/pictures/user/d6af5...,,1.0,1.0,['phone'],t,t,,Botany Bay,,-33.92350,151.19160,Private room in apartment,Private room,2,,1.0,2.0,"[""TV"", ""Long term stays allowed"", ""Carbon mono...",$50.00,1,1125,1,1,1125,1125,1.0,1125.0,,t,0,0,0,0,2021-03-07,0,0,,,,,,,,,,,t,1,0,1,0,,2021-03-01


In [44]:
engine = sa.create_engine(conn_string)
query = 'SELECT schema_name FROM information_schema.schemata;'
pd.read_sql(con=engine,
            sql=query)

Unnamed: 0,schema_name
0,pg_toast
1,pg_catalog
2,public
3,information_schema
4,topology
5,cron
6,star
7,raw
8,data_mart


In [46]:
query = f"""
SELECT *
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
      schemaname = 'raw';
"""

pd.read_sql(con=engine,
            sql=query)

Unnamed: 0,schemaname,tablename,tableowner,tablespace,hasindexes,hasrules,hastriggers,rowsecurity


In [63]:
import logging
logging.basicConfig(filename='db.log')
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

In [64]:
query = f"""
SELECT MAX("Tot_P_M") FROM star."2016Census_G01_NSW_LGA"
"""

pd.read_sql(con=engine,
            sql=query)

[[34m2021-05-20 22:46:21,272[0m] {[34mbase.py:[0m132} INFO[0m - select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s[0m
[[34m2021-05-20 22:46:21,274[0m] {[34mbase.py:[0m132} INFO[0m - [cached since 2.585e+04s ago] {'name': '\nSELECT MAX("Tot_P_M") FROM star."2016Census_G01_NSW_LGA"\n'}[0m
[[34m2021-05-20 22:46:21,278[0m] {[34mbase.py:[0m132} INFO[0m - 
SELECT MAX("Tot_P_M") FROM star."2016Census_G01_NSW_LGA"
[0m
[[34m2021-05-20 22:46:21,279[0m] {[34mbase.py:[0m132} INFO[0m - [raw sql] {}[0m


Unnamed: 0,max
0,172327


In [65]:
result = engine.connect().execute('SELECT MAX("Tot_P_M") FROM star."2016Census_G01_NSW_LGA"')
for row in result:
    print(row)

[[34m2021-05-20 22:47:20,532[0m] {[34mbase.py:[0m132} INFO[0m - SELECT MAX("Tot_P_M") FROM star."2016Census_G01_NSW_LGA"[0m
[[34m2021-05-20 22:47:20,533[0m] {[34mbase.py:[0m132} INFO[0m - [raw sql] {}[0m
(172327,)


In [67]:
query = """
SELECT tablename
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
    schemaname != 'information_schema';
"""

pd.read_sql(con=engine,
            sql=query)

[[34m2021-05-21 00:02:01,141[0m] {[34mbase.py:[0m132} INFO[0m - select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s[0m
[[34m2021-05-21 00:02:01,142[0m] {[34mbase.py:[0m132} INFO[0m - [cached since 3.039e+04s ago] {'name': "\nSELECT tablename\nFROM pg_catalog.pg_tables\nWHERE schemaname != 'pg_catalog' AND \n    schemaname != 'information_schema';\n"}[0m
[[34m2021-05-21 00:02:01,144[0m] {[34mbase.py:[0m132} INFO[0m - 
SELECT tablename
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
    schemaname != 'information_schema';
[0m
[[34m2021-05-21 00:02:01,145[0m] {[34mbase.py:[0m132} INFO[0m - [raw sql] {}[0m


Unnamed: 0,tablename
0,import_error
1,spatial_ref_sys
2,alembic_version
3,ab_permission_view_role
4,connection
5,dag_pickle
6,dag
7,airbnb_listing_202104
8,topology
9,layer


In [69]:
from datetime import timedelta
timedelta(minutes=5)

datetime.timedelta(seconds=300)

In [71]:
path = raw_data_dir / '2020-05-09.gz'
df_202005 = pd.read_csv(path, compression='gzip')
path = raw_data_dir / '2021-04-10.gz'
df_202104 = pd.read_csv(path, compression='gzip')

In [72]:
set(df_202005.columns) - set(df_202104.columns)

{'Unnamed: 0',
 'access',
 'bed_type',
 'cancellation_policy',
 'city',
 'cleaning_fee',
 'country',
 'country_code',
 'experiences_offered',
 'extra_people',
 'guests_included',
 'house_rules',
 'interaction',
 'is_business_travel_ready',
 'is_location_exact',
 'jurisdiction_names',
 'market',
 'medium_url',
 'monthly_price',
 'notes',
 'require_guest_phone_verification',
 'require_guest_profile_picture',
 'requires_license',
 'security_deposit',
 'smart_location',
 'space',
 'square_feet',
 'state',
 'street',
 'summary',
 'thumbnail_url',
 'transit',
 'weekly_price',
 'xl_picture_url',
 'zipcode'}

In [73]:
set(df_202104.columns) - set(df_202005.columns)

{'bathrooms_text', 'number_of_reviews_l30d'}