In [None]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import datetime
import re

plt.style.use('seaborn')
sns.set_palette("husl")
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

### Read in the data

In [None]:
df = pd.read_csv('listings_september.csv')

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.columns

### Clean Zipcode

### Clean Price

The cleaning process includes removing the $ sign and converting the dataframe object to a int64.

In [None]:
def clean_price(dataframe, list_of_columns):
    '''
    Iterate through a list_of_columns in a dataframe (df)
    and convert price objects into int64.
    '''
    
    for column in list_of_columns:
        dataframe[column] = dataframe[column].str[1:]
        dataframe[column] = pd.to_numeric(dataframe[column], errors='coerce').fillna(0)
        dataframe[column] = dataframe[column].astype('int64')
    return dataframe

In [None]:
df_clean2 = clean_price(df_clean2, ['price', 'security_deposit', 'cleaning_fee', 'extra_people'])

### Rent as a proxy for location

In [None]:
df_rent = pd.read_csv("data/chicago-rent.csv")

In [None]:
df_new = pd.merge(df, df_rent, on ='zipcode')

### Datetime

In [None]:
def make_datetime_object(dataframe, list_of_columns):
    '''
    Iterate through a list_of_columns in a dataframe (df)
    and convert date objects into datetime.
    '''
    
    for column in list_of_columns:
        dataframe[column] = pd.to_datetime(dataframe[column], format = '%m-%d-%Y', errors ='ignore')
        dataframe[column] = dataframe[column].astype('datetime64')
    return dataframe

In [None]:
now = datetime.datetime.now().date()
print(now)

In [None]:
df_clean2['now'] = now

In [None]:
df_clean2['review_period'] = (df_clean2['last_review'] - df_clean2['first_review']).astype('timedelta64[D]')

In [None]:
df_clean2['Number_of_days_as_host'] = (df_clean2['now'] - df_clean2['host_since']).astype('timedelta64[D]')

In [None]:
df_clean2 = make_datetime_object(df_clean2, ['host_since', 'first_review', 'last_review', 'now'])
df_clean2.info()

### Fill empty cells with 0

In [None]:
def recode_empty_cells(dataframe, list_of_columns):
    '''
    Iterate through a list_of_columns in a dataframe 
    and replace the empty cells with 0.
    '''

    for column in list_of_columns:
        dataframe[column] = dataframe[column].replace(r'\s+', np.nan, regex=True, inplace=True)
        #dataframe[column] = dataframe[column].fillna(0)

    return dataframe