In [4]:
import warnings
from glob import glob

import pandas as pd
import seaborn as sns
from category_encoders import OneHotEncoder
from IPython.display import VimeoVideo
from ipywidgets import Dropdown, FloatSlider, IntSlider, interact
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import make_pipeline
from sklearn.utils.validation import check_is_fitted

warnings.simplefilter(action="ignore", category=FutureWarning)

# Data Wrangling

We are interested in predicting apartment prices in Buenos Aires (`"Capital Federal"`) that cost less than $400,000. First we will filter the data to extract what we need

We create a function to handle data wrangling since that data have the same structure. We can pass the file through the functino to process it faster

In [8]:
def wrangle(filepath):
    df = pd.read_csv(filepath)

    # Subset data: Apartment, Capital Federal, < $400,000
    mask_1 = df['property_type'] == 'apartment'
    mask_2 = df['place_with_parent_names'].str.contains('Capital Federal')
    mask_3 = df['price_aprox_usd'] < 400000

    df = df[mask_1 & mask_2 & mask_3]
    return df

In [9]:
df = wrangle('data/buenos-aires-real-estate-1.csv')
df.head()

Unnamed: 0,operation,property_type,place_with_parent_names,lat-lon,price,currency,price_aprox_local_currency,price_aprox_usd,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,price_per_m2,floor,rooms,expenses,properati_url
0,sell,apartment,|Argentina|Capital Federal|Villa Crespo|,"-34.6047834183,-58.4586812499",180000.0,USD,2729232.0,180000.0,120.0,110.0,1500.0,1636.363636,,4.0,,http://villa-crespo.properati.com.ar/12egq_ven...
4,sell,apartment,|Argentina|Capital Federal|Chacarita|,"-34.5846508988,-58.4546932614",129000.0,USD,1955949.6,129000.0,76.0,70.0,1697.368421,1842.857143,,,,http://chacarita.properati.com.ar/10qlv_venta_...
9,sell,apartment,|Argentina|Capital Federal|Villa Luro|,"-34.6389789,-58.500115",87000.0,USD,1319128.8,87000.0,48.0,42.0,1812.5,2071.428571,,,,http://villa-luro.properati.com.ar/12m82_venta...
11,sell,apartment,|Argentina|Capital Federal|Once|,"-34.6050060697,-58.4001162302",60000.0,USD,909744.0,60000.0,28.0,28.0,2142.857143,2142.857143,,1.0,,http://once.properati.com.ar/zz0q_venta_depart...
20,sell,apartment,|Argentina|Capital Federal|San Nicolás|,"-34.603898,-58.378617",69000.0,USD,1046205.6,69000.0,,22.0,,3136.363636,23.0,2.0,,http://san-nicolas.properati.com.ar/rnju_venta...
