## Data Preparation

In [122]:
import pandas as pd 
from pandas.io.json import json_normalize
import numpy as np 
from pymongo import MongoClient
import json, ast

### Database Connection

In [123]:
DB_USERNAME = 'anutibara'
DB_PASSWORD = 'anutibara'
DB_HOST = 'scraping-cluster-7dtgt.gcp.mongodb.net'
DB_NAME = 'scraping_db'
try:
    client = MongoClient(f'mongodb+srv://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')
    print("Database connected successfully")
except Exception as e:
    print("Error to connect to database: ", e)
db = client.get_database('scraping_db')
properties = db.properties

Database connected successfully


### Database Queries

##### Total Documents

In [124]:
properties.count_documents({})

34

### Convert Mongo Collection to DataFrame

In [136]:
json_documents = list(properties.find({}))
df_general_info = pd.DataFrame(json_documents, columns=['_id', 'urlProperty', 'scrapingDate', 'scrapingHour', 'modifyDate', 'code', 'status', 'type', 'use', 'nameProject', 'description'])
df_location = json_normalize(json_documents, 'location')
df_owner_property = json_normalize(json_documents, 'ownerProperty')
df_features = json_normalize(json_documents, 'features')
df_more_features = json_normalize(json_documents, 'moreFeatures')
df_offers_type = json_normalize(json_documents, 'offersType')
df = pd.concat([df_general_info, df_location, df_owner_property, df_features, df_more_features, df_offers_type], axis=1)

### Rename DataFrame Columns 

In [140]:
df.columns = [
                'id_mongoose', 'id_property', 'scraping_date', 'scraping_hour', 'modify_date', 'code', 'active', 'type', 'new_property', 
                'name_project', 'description', 'address', 'city', 'country', 'department', 'latitude', 'longitude',
                'neighborhood', 'sector', 'contract_type_owner_property', 'financing_owner_property', 'id_owner_property',
                'name_owner_property', 'schedule_owner_property', 'admon_price', 'antiquity', 'bathrooms', 'condition',
                'construction_area', 'floors', 'garages', 'includes_administration', 'interior_floors', 'price', 'private_area',
                'rooms', 'square_meters', 'square_meters_price', 'stratum', 'weather', 'exterior_features', 'interior_features',
                'sector_features', 'area_offers_type', 'bathrooms_offers_type', 'transaction_offers_type', 'price_offers_type',
                'private_area_offers_type', 'property_offers_type', 'rooms_offers_type'
             ]

### Data Cleaning Methods

In [144]:
def clean_status(status):
    if(status == 'Active'):
        return True
    return False

def clean_use(use_status):
    if(use_status == 'Nuevo'):
        return True
    return False

## Data Cleaning

In [147]:
df['active'] = df['active'].apply(clean_status)
df['new_property'] = df['new_property'].apply(clean_use)
df.head(10)

Unnamed: 0,id_mongoose,id_property,scraping_date,scraping_hour,modify_date,code,active,type,new_property,name_project,...,exterior_features,interior_features,sector_features,area_offers_type,bathrooms_offers_type,transaction_offers_type,price_offers_type,private_area_offers_type,property_offers_type,rooms_offers_type
0,5db20932a9c69034d0d532f3,https://www.fincaraiz.com.co/parma-apartamento...,24/10/2019,15:27:17,8/30/2019 6:07:03 PM,4871548.0,False,Venta,False,PARMA APARTAMENTOS,...,"[Ascensor, Garaje / Parqueadero(s), Piscina, S...","[Balcón, Baño Auxiliar, Closet, Comedor]","[Cómodas vias de acceso, Seguridad, Trans. Púb...",91.95,2,Venta,685.000.000,,Apartamento,2
1,5db2093249f9baa043d532f3,https://www.fincaraiz.com.co/lote-en-venta/med...,24/10/2019,15:27:17,10/24/2019 8:08:16 PM,5026412.0,False,Venta,False,Lote en Venta - Medellín SurOriente,...,"[Pozo de agua natural, Rio / Quebrada cercano(...",[Con Vivienda],"[Área Rural, Cerca a sector comercial, Sobre v...",94.47,2,Venta,702.400.000,,Apartamento,2
2,5db209329d4584d42ed532f3,https://www.fincaraiz.com.co/apartamento-en-ve...,24/10/2019,15:27:17,10/24/2019 8:04:16 PM,4934670.0,False,Venta,False,Apartamento en Venta - Medellín La Tomatera,...,"[Acceso Pavimentado, Ascensor, Canchas Deporti...","[Balcón, Calentador, Citófono, Cocina Integral...","[Parques cercanos, Trans. Público cercano, Zon...",109.54,2,Venta,794.700.000,,Apartamento,2
3,5db2093292ea284128d532f3,https://www.fincaraiz.com.co/origen-reserva-se...,24/10/2019,15:27:17,7/22/2019 8:58:45 PM,3110462.0,False,Venta,False,ORIGEN RESERVA SERRAT,...,"[Ascensor, Canchas Deportivas, Garaje / Parque...","[Balcón, Baño Auxiliar, Estudio, Garaje Cubierto]","[Colegios / Universidades, Cómodas vias de acc...",111.0,3,Venta,840.300.000,,Apartamento,2
4,5db2093216520c635bd532f3,https://www.fincaraiz.com.co/palermo/medellin/...,24/10/2019,15:27:17,10/23/2019 1:33:28 PM,2393445.0,False,Venta,False,Palermo,...,"[Acceso Pavimentado, Corrales, Cuarto de Escol...","[Acceso para camiones, Acceso para tractomulas...","[Bombas de gasolina, Cerca a sector comercial,...",68.74,2,Venta,273.007.290,61.99,Apartamento,3
5,5db20932878c21fa7cd532f3,https://www.fincaraiz.com.co/apartamento-en-ve...,24/10/2019,15:27:17,10/24/2019 8:09:14 PM,4601041.0,False,Venta,False,Apartamento en Venta - Medellín Buenos Aires,...,"[Ascensor, Portería / Recepción, Vigilancia]","[Balcón, Citófono, Cocina Integral, Hall de Al...","[Colegios / Universidades, Parques cercanos, S...",76.97,2,Venta,310.685.745,68.9,Apartamento,3
6,5db20932ff12b96d69d532f3,https://www.fincaraiz.com.co/origen-reserva-se...,24/10/2019,15:27:17,7/22/2019 8:58:45 PM,3110462.0,False,Venta,False,ORIGEN RESERVA SERRAT,...,"[Ascensor, Canchas Deportivas, Garaje / Parque...","[Balcón, Baño Auxiliar, Estudio, Garaje Cubierto]","[Colegios / Universidades, Cómodas vias de acc...",76.42,2,Venta,303.008.570,68.8,Apartamento,3
7,5db209329b100dd025d532f3,https://www.fincaraiz.com.co/origen-reserva-se...,24/10/2019,15:27:17,7/22/2019 8:58:45 PM,3110462.0,False,Venta,False,ORIGEN RESERVA SERRAT,...,"[Ascensor, Canchas Deportivas, Garaje / Parque...","[Balcón, Baño Auxiliar, Estudio, Garaje Cubierto]","[Colegios / Universidades, Cómodas vias de acc...",62.0,2,Venta,250.727.000,56.13,Apartamento,3
8,5db2093262025965cad532f3,https://www.fincaraiz.com.co/reserva-serrat-se...,24/10/2019,15:27:17,8/23/2019 5:27:51 PM,4757533.0,False,Venta,False,Reserva Serrat Selva,...,"[Piscina, Salón Comunal, Zona de Camping, Zona...","[Ascensores Comunales, Balcón, Closet]","[Cómodas vias de acceso, Seguridad, Supermerca...",68.74,2,Venta,272.407.290,61.99,Apartamento,3
9,5db209322d35f4bde8d532f3,https://www.fincaraiz.com.co/casa-en-venta/med...,24/10/2019,15:27:17,10/24/2019 8:18:32 PM,4654459.0,False,Venta,False,Casa en Venta - Medellín La Visitación,...,"[Garaje(s), Piscina]","[Balcón, Baño Auxiliar, Baño de Servicio, Cale...",[],76.97,2,Venta,310.685.745,68.9,Apartamento,3
