# Introduction

This notebook performs EDA on the Airbnb listings data http://data.insideairbnb.com/australia/nsw/sydney/2021-04-10/data/listings.csv.gz.

In [1]:
import os
import gzip
import shutil
import requests
import pandas as pd
from pathlib import Path
from dotenv import (
    load_dotenv,
    find_dotenv
)
from pandas_profiling import ProfileReport
import sweetviz as sv
import psycopg2

In [2]:
load_dotenv(find_dotenv())

project_dir = Path(find_dotenv()).parent
data_dir = project_dir / 'data'
raw_data_dir = data_dir / 'raw'
interim_data_dir = data_dir / 'interim'
reports_dir = project_dir / 'reports'

# Load data

In [15]:
url = 'http://data.insideairbnb.com/australia/nsw/sydney/2021-04-10/data/listings.csv.gz'
df = pd.read_csv(url, compression='gzip')

In [16]:
df

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,11156,https://www.airbnb.com/rooms/11156,20210410042103,2021-04-12,An Oasis in the City,Very central to the city which can be reached ...,"It is very close to everything and everywhere,...",https://a0.muscache.com/pictures/2797669/17895...,40855,https://www.airbnb.com/users/show/40855,...,10.0,10.0,10.0,,f,1,0,1,0,1.42
1,12351,https://www.airbnb.com/rooms/12351,20210410042103,2021-04-15,Sydney City & Harbour at the door,Come stay with Vinh & Stuart (Awarded as one o...,"Pyrmont is an inner-city village of Sydney, on...",https://a0.muscache.com/pictures/763ad5c8-c951...,17061,https://www.airbnb.com/users/show/17061,...,10.0,10.0,10.0,,f,2,0,2,0,4.03
2,14250,https://www.airbnb.com/rooms/14250,20210410042103,2021-04-14,Manly Harbour House,"Beautifully renovated, spacious and quiet, our...",Balgowlah Heights is one of the most prestigio...,https://a0.muscache.com/pictures/56935671/fdb8...,55948,https://www.airbnb.com/users/show/55948,...,8.0,9.0,8.0,,f,2,2,0,0,0.03
3,15253,https://www.airbnb.com/rooms/15253,20210410042103,2021-04-12,Unique Designer Rooftop Apartment in City Loca...,Penthouse living at it best ... You will be st...,The location is really central and there is nu...,https://a0.muscache.com/pictures/46dcb8a1-5d5b...,59850,https://www.airbnb.com/users/show/59850,...,10.0,10.0,9.0,,t,1,0,1,0,3.30
4,44545,https://www.airbnb.com/rooms/44545,20210410042103,2021-04-13,Sunny Darlinghurst Warehouse Apartment,Sunny warehouse/loft apartment in the heart of...,Darlinghurst is home to some of Sydney's best ...,https://a0.muscache.com/pictures/a88d8e14-4f63...,112237,https://www.airbnb.com/users/show/112237,...,10.0,10.0,10.0,,f,1,1,0,0,0.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32674,49118065,https://www.airbnb.com/rooms/49118065,20210410042103,2021-04-12,"Marvellous Marrickville apartment, 15 mins to ...",For those wanting easy access to inner Sydney'...,,https://a0.muscache.com/pictures/94a3bd3f-4ac2...,4358703,https://www.airbnb.com/users/show/4358703,...,,,,,t,3,3,0,0,
32675,49118280,https://www.airbnb.com/rooms/49118280,20210410042103,2021-04-12,Peaceful Pittwater Views from Eclectic Getaway,Immerse yourself in this north facing peaceful...,,https://a0.muscache.com/pictures/96d93fbd-590d...,95214788,https://www.airbnb.com/users/show/95214788,...,,,,,t,35,35,0,0,
32676,49118321,https://www.airbnb.com/rooms/49118321,20210410042103,2021-04-12,MQ13 Convinent 2 Bedroom Close MQ Shopping Centre,Brand new luxury 2 bedroom apartments in the h...,,https://a0.muscache.com/pictures/ad703b5e-e9d9...,382207272,https://www.airbnb.com/users/show/382207272,...,,,,,f,26,26,0,0,
32677,49118480,https://www.airbnb.com/rooms/49118480,20210410042103,2021-04-13,FD50 Newly Furnished 1 Bedroom in Five Dock,Five dock is a real gem in waterside centrally...,,https://a0.muscache.com/pictures/b6c19d4b-424f...,382207272,https://www.airbnb.com/users/show/382207272,...,,,,,f,26,26,0,0,


In [17]:
path = interim_data_dir / 'listings'
df.to_csv(path.with_suffix('.csv'), index=False)
df.to_parquet(path.with_suffix('.parquet'), index=False)

# Create profile report

In [18]:
profile_report = ProfileReport(df,
                               title='Airbnb listing profile report',
                               explorative=True)
path = reports_dir / 'profile_report_airbnb'
profile_report.to_file(path.with_suffix('.html'))

Summarize dataset:   0%|          | 0/87 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

# Create sweetviz report

In [19]:
sweetviz_report = sv.analyze(df)
sweetviz_report.show_html()

                                             |          | [  0%]   00:00 -> (? left)

Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


# Connect to Postgres

In [3]:
POSTGRES_HOST=os.environ.get('POSTGRES_HOST')
POSTGRES_PORT=os.environ.get('POSTGRES_PORT')
POSTGRES_USERNAME=os.environ.get('POSTGRES_USERNAME')
POSTGRES_PASSWORD=os.environ.get('POSTGRES_PASSWORD')
POSTGRES_DB=os.environ.get('POSTGRES_DB')

In [4]:
POSTGRES_USERNAME

'airflow'

In [5]:
conn = psycopg2.connect(
    host=POSTGRES_HOST,
    database=POSTGRES_DB,
    user=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    port=POSTGRES_PORT
)

conn

OperationalError: could not connect to server: Connection refused
	Is the server running on host "localhost" (127.0.0.1) and accepting
	TCP/IP connections on port 5432?
could not connect to server: Cannot assign requested address
	Is the server running on host "localhost" (::1) and accepting
	TCP/IP connections on port 5432?
