In [1]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import pandas as pd
from datetime import datetime
from datetime import time

client = MongoClient()
db = client.reality
coll = db['sreality_all']

# Prepare dataset

In [20]:
dataset = coll.aggregate([
    {'$match': {
        'seo.category_main_cb': 1, # Apartments
        'seo.category_type_cb': 1, # Sell
        'seo.locality': {'$regex': '^praha'},
        'timeAdded': {'$gt': datetime.combine(datetime.now().date(), time(0, 0, 0))}
    }},
    {'$group': {
        '_id': "$hash_id",
        'labelsAll': {'$first': "$labelsAll"},
        'price': {'$first': "$price"},
        'name': {'$first': "$name"},
        'locality': {'$first': "$seo.locality"},
        'totalFloorArea': {'$first': '$totalFloorArea'},
        'public_transport_distance': {'$last': "$closestPublicTransportStop.distance"},
        'layout': {'$first': '$seo.category_sub_cb'}
    }}
 ])
df_original = pd.DataFrame(dataset)

In [68]:
import joblib
from reality_prepare_rent_dataset import prepareDataset
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
model = joblib.load('rent-model.pipeline')

In [69]:
df = prepareDataset(df_original.copy())
df = df.reindex(columns=model.features, fill_value=0)

In [70]:
X_pred = df
X_trans = joblib.load('rent-model.scaler').transform(X_pred)
X_trans = joblib.load('rent-model.pca').transform(X_trans)

X_red = X_trans[:, :model.features_num]

In [None]:
y_pred = model.predict(X_red)

# Testing

In [73]:
df_original['predicted_rent_price'] = y_pred * df_original['totalFloorArea'].astype('int')

In [86]:
df_original[df_original['locality'].str.contains('vinohrady')][df_original['totalFloorArea'] < 100][['name', 'locality', 'totalFloorArea', 'price', 'predicted_rent_price']].sort_values(by='totalFloorArea')

  """Entry point for launching an IPython kernel.


Unnamed: 0,name,locality,totalFloorArea,price,predicted_rent_price
529,Prodej bytu 1+kk 15 m²,praha-vinohrady-spanelska,15,2970000,4987.595616
528,Prodej bytu 1+kk 15 m²,praha-vinohrady-spanelska,15,2970000,4933.776996
574,Prodej bytu 1+kk 15 m²,praha-vinohrady-perucka,15,2280000,5599.923866
4395,Prodej bytu 1+kk 15 m²,praha-vinohrady-spanelska,15,2970000,5003.673029
3915,Prodej bytu 1+kk 16 m²,praha-vinohrady-spanelska,16,2970000,6938.546067
...,...,...,...,...,...
646,Prodej bytu 3+kk 96 m²,praha-vinohrady-belgicka,96,14510000,27864.193611
517,Prodej bytu 3+kk 97 m²,praha-vinohrady-velehradska,97,13990000,32253.118316
552,Prodej bytu 3+kk 98 m²,praha-vinohrady-safarikova,98,12950000,32585.624691
566,Prodej bytu 3+1 98 m²,praha-vinohrady-polska,98,14490000,25412.160349
