---
# Create the Data Reports
---

**Content**

- Data Report: listings.csv

- Data Report: features.csv


**Central Concepts**

- Dashboard

---
---


This script creates data reports\
for listings.csv and for features.csv.

Run the cells below one by one\
to create the report for listings.csv individually.

In [None]:
# install a package

!pip install -U pandas-profiling[notebook] &> /dev/null;
!pip install imblearn &> /dev/null;

In [None]:
from pathlib import Path
import pandas as pd
from pandas_profiling import ProfileReport

In [None]:
# reload ./utils.py

import importlib
import utils
importlib.reload(utils)
from utils import get_dichotomous

In [None]:
# prepare the working directory

cwd = Path()

opath = cwd / 'data'
ppath = cwd / 'plots'

opath.mkdir(exist_ok=True)
ppath.mkdir(exist_ok=True)

ifname_listings = opath / 'listings.csv'
ifname_features = opath / 'features.csv'

In [None]:
# included variables

cindex = ['id']

cnames = []
cnames += ["space"]
cnames += ["description"]
cnames += ["host_since"]
cnames += ["host_is_superhost"]
cnames += ["neighbourhood_group_cleansed"]
cnames += ["latitude"]
cnames += ["longitude"]
cnames += ["room_type"]
cnames += ["bathrooms"]
cnames += ["bedrooms"]
cnames += ["beds"]
cnames += ["amenities"]
cnames += ["square_feet"]
cnames += ["price"]
cnames += ["cleaning_fee"]
cnames += ["security_deposit"]
cnames += ["minimum_nights"]
cnames += ["number_of_reviews"]
cnames += ["review_scores_rating"]
cnames += ["review_scores_cleanliness"]
cnames += ["review_scores_location"]
cnames += ["instant_bookable"]
cnames += ["host_id"]

# Data report: listings.csv

In [None]:
# create report of raw input data

ifname = ifname_listings
stem = ifname.stem
ofname = ppath / f'data_report_{stem}.html'

if ifname.is_file():
    if not ofname.is_file():
        data = pd.read_csv(ifname, usecols=cnames+cindex, index_col=cindex).sort_index(axis=1)
        profile = ProfileReport(data, title=f'Report for the {stem.capitalize()}')
        profile.to_file(ofname)
    else:
        print(f'*** File {ofname} already exists.')
else:
    print(f'*** File {ifname} not found! Download the raw data')

# Data report: features.csv

- first, run the preprocessing script successfully
- for a concise overview: drop all dichotomous variables

In [None]:
# remove dichotomous variables

ifname = ifname_features
stem = ifname.stem
ofname = ppath / f'data_report_{stem}.html'

if ifname.is_file():
    data = pd.read_csv(ifname, index_col=cindex).sort_index(axis=1)

    left_out_columns = get_dichotomous(data)
    data.drop(get_dichotomous(data), axis=1, inplace=True)

    profile = ProfileReport(data, title=f'Report for the {stem.capitalize()}')
    profile.to_file(ofname)
else:
    print(f'*** File {ifname} not found! Run the preprocessing script')


print('*** The following dichotomous features were left out:')
print(left_out_columns)