In [1]:
import json
import pymongo
import pandas as pd

In [2]:
def get_db(permission:str="read"):
    with open("../.db_creds/creds_mongo_mab.json", "r") as f:
        creds_mongo = json.load(f)[permission]
    username = creds_mongo["username"]
    password = creds_mongo["password"]

    cluster = pymongo.MongoClient(
        f"mongodb+srv://{username}:{password}@cluster0.io0gaio.mongodb.net/?retryWrites=true&w=majority"
        )
    mydb = cluster["flats"]
    return mydb

In [3]:
def find_penthouses():
    mydb = get_db()
    collection_flats = mydb["_flats"] 
    flats = collection_flats.find({"propertyType": "penthouse", 'province': 'València'})
    result = {}
    for flat in flats:
        stats = {}
        if "district" in flat:
            stats["district"] = flat["district"]
        if "neighborhood" in flat:
            stats["neighborhood"] = flat["neighborhood"] 
        if "size" in flat:
            stats["size"] = flat["size"]   
        stats['data'] = flat
        result[flat["propertyCode"]] = stats 

    return result

In [4]:
def get_latest_dates(procertyCodes:list) -> dict:
    """Takes a list of flat IDs and returns the date of latest price update"""
    mydb = get_db()
    collection_prices = mydb["_prices"]

    name_cursor = collection_prices.aggregate([
        {'$group': {'_id':'$propertyCode', 'date': {'$max': "$date"}}}, 
        {"$sort": {"count" : -1} }
        ])
    
    results = {}
    for cur in name_cursor:
        if cur["_id"] in procertyCodes:
            results[cur["_id"]] = cur["date"]

    return results

In [5]:
def get_latest_price(ids_dates_dict: dict) -> dict:
    mydb = get_db()
    collection_prices = mydb["_prices"]
    results = {}

    for id, date in ids_dates_dict.items():
        flat = collection_prices.find({"propertyCode": id, "date": date})[0]
        results[flat['propertyCode']] = {'latestPrice': flat['price'], 'latestDate': flat['date']}

    return results

In [6]:
def get_latest_price_distr(dist):
    with open("../output/avg_district_prices_pent.json", "r") as f:
        distr_prices_pent_json = json.load(f)  
    distr_prices = distr_prices_pent_json[dist]
    latest_price = distr_prices[max(distr_prices)]["price"]
    return latest_price
    

In [7]:
def get_latest_price_neigh(neighborhood):
    with open("../output/avg_neighborhood_prices_pent.json", "r") as f:
        neighborhood_prices_pent_json = json.load(f) 
    neigh_prices = neighborhood_prices_pent_json[neighborhood]
    latest_price = neigh_prices[max(neigh_prices)]["price"]
    return latest_price

In [8]:
penthouses_data = find_penthouses()
penhouse_latest_dates = get_latest_dates(penthouses_data)
results = get_latest_price(penhouse_latest_dates)

In [9]:
# logger.info(penthouses_data)

In [None]:
# sample_penthouse = penthouses_data['98964731']

def get_price_data(id, flat_data, results):
    logger.info(flat_data)
    res = []
    flat_size = flat_data['size']

    if 'district' in flat_data:
        district_price = get_latest_price_distr(flat_data['district'])
        district_price_size = flat_size * district_price
        res.append(district_price_size)

    if 'neighborhood' in flat_data:
        neighborhood_price = get_latest_price_neigh(flat_data['neighborhood'])
        neighborhood_price_size = flat_size * neighborhood_price
        res.append(neighborhood_price_size)

    latest_flat_price = results[id]['latestPrice']
    res.append(latest_flat_price)

    logger.info(res)


for penthouse_id, penthouse_data in penthouses_data.items():
    get_price_data(penthouse_id, penthouse_data, results)