In [20]:
import pandas as pd
import numpy as np
from joblib import load
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction import DictVectorizer
from scipy.sparse import hstack
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV
#from sklearn.preprocessing import PolynomialFeatures, StandardScaler
#from sklearn.metrics import mean_squared_error, r2_score
from sklearn import preprocessing

In [85]:
# Importar Dataset con predicciones
df = pd.read_pickle('dataset_con_predicciones.pkl')
df['precio_mercado_usd'] = df['surface_total_in_m2'] * df['price_usd_per_m2_pred'].astype(int)
df['dif_val'] = df['price_aprox_usd'] - df['precio_mercado_usd']

In [86]:
# Limpiar Dataset
df = df[['property_type', 'place_name', 'state_name', 'price_aprox_usd', 'surface_total_in_m2', 'surface_covered_in_m2',
       'price_usd_per_m2', 'floor', 'rooms', 'expenses', 'zona', 'price_usd_per_m2_pred', 'dif_val','precio_mercado_usd']]

In [108]:
# Eliminar decimales para facilitar visualizacion
df = df.round(0)

In [109]:
# Seleccionar una muestra de 100 propiedades
portafolio = df.sample(n=100,random_state=100)

In [110]:
portafolio.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 14238 to 79710
Data columns (total 14 columns):
property_type            100 non-null object
place_name               100 non-null object
state_name               100 non-null object
price_aprox_usd          100 non-null float64
surface_total_in_m2      100 non-null float64
surface_covered_in_m2    100 non-null float64
price_usd_per_m2         100 non-null float64
floor                    100 non-null float64
rooms                    100 non-null float64
expenses                 100 non-null float64
zona                     100 non-null object
price_usd_per_m2_pred    100 non-null float64
dif_val                  100 non-null float64
precio_mercado_usd       100 non-null float64
dtypes: float64(10), object(4)
memory usage: 11.7+ KB


In [111]:
# Monto Cartera
valor_cartera_usd = portafolio.precio_mercado_usd.sum()
print(valor_cartera_usd)

21034022.0


In [112]:
# Ranking Subvaluadas en Portafolio
portafolio.sort_values(by='dif_val')

In [196]:
# Ranking Subvaluadas en Mercado - 1ras 100
df.sort_values(by='dif_val')

Unnamed: 0,property_type,place_name,state_name,price_aprox_usd,surface_total_in_m2,surface_covered_in_m2,price_usd_per_m2,floor,rooms,expenses,zona,price_usd_per_m2_pred,dif_val,precio_mercado_usd
52747,house,Capital Federal,Capital Federal,450000.0,2220.0,2220.0,203.0,0.0,1.0,0.0,Capital & GBA,1515.0,-2911080.0,3361080.0
21663,house,Los Cardales,Buenos Aires Interior,60000.0,2500.0,2500.0,24.0,0.0,1.0,0.0,Interior,1181.0,-2890000.0,2950000.0
116151,house,Maschwitz,Bs.As. G.B.A. Zona Norte,290000.0,2500.0,2500.0,116.0,0.0,4.0,0.0,Capital & GBA,1270.0,-2882500.0,3172500.0
17917,house,Bs.As. G.B.A. Zona Sur,Bs.As. G.B.A. Zona Sur,600000.0,5255.0,475.0,114.0,0.0,2.0,0.0,Capital & GBA,634.0,-2726415.0,3326415.0
13376,house,El Remanso,Buenos Aires Interior,180000.0,3800.0,3800.0,47.0,0.0,1.0,0.0,Interior,732.0,-2601600.0,2781600.0
5638,store,San Martín de los Andes,Neuquén,950000.0,2815.0,315.0,337.0,0.0,7.0,0.0,Interior,1226.0,-2501190.0,3451190.0
68851,store,Florida,Bs.As. G.B.A. Zona Norte,1400000.0,1809.0,1809.0,774.0,0.0,1.0,0.0,Capital & GBA,1983.0,-2185438.0,3585438.0
71861,house,Palermo Chico,Capital Federal,1607767.0,1200.0,1100.0,1340.0,0.0,3.0,0.0,Capital & GBA,3150.0,-2171033.0,3778800.0
94953,store,Berazategui,Bs.As. G.B.A. Zona Sur,240000.0,2000.0,2000.0,120.0,0.0,1.0,0.0,Capital & GBA,1204.0,-2166000.0,2406000.0
35469,store,Tigre,Bs.As. G.B.A. Zona Norte,250000.0,1575.0,250.0,159.0,1.0,1.0,0.0,Capital & GBA,1483.0,-2084150.0,2334150.0


In [231]:
# Cartera Ideal por diferencia de valuacion
c = -1
i = 1
lista = []
while i < valor_cartera_usd:
  c += 1
  i += df.sort_values(by='dif_val')[['precio_mercado_usd']].iloc[c].values[0]
  print(\
        df.sort_values(by='dif_val')[['property_type']].iloc[c].values[0],\
        ' - ',\
        df.sort_values(by='dif_val')[['place_name']].iloc[c].values[0],\
        ' - ',\
        df.sort_values(by='dif_val')[['state_name']].iloc[c].values[0],\
        ' - ',\
        df.sort_values(by='dif_val')[['dif_val']].iloc[c].values[0],\
        ' - ',\
        df.sort_values(by='dif_val')[['precio_mercado_usd']].iloc[c].values[0])

house  -  Capital Federal  -  Capital Federal  -  -2911080.0  -  3361080.0
house  -  Los Cardales  -  Buenos Aires Interior  -  -2890000.0  -  2950000.0
house  -  Maschwitz  -  Bs.As. G.B.A. Zona Norte  -  -2882500.0  -  3172500.0
house  -  Bs.As. G.B.A. Zona Sur  -  Bs.As. G.B.A. Zona Sur  -  -2726415.0  -  3326415.0
house  -  El Remanso  -  Buenos Aires Interior  -  -2601600.0  -  2781600.0
store  -  San Martín de los Andes  -  Neuquén  -  -2501190.0  -  3451190.0
store  -  Florida  -  Bs.As. G.B.A. Zona Norte  -  -2185438.0  -  3585438.0
