In [212]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [213]:
df = pd.read_csv('../data/ar_properties.csv')
df

Unnamed: 0,id,ad_type,start_date,end_date,created_on,lat,lon,l1,l2,l3,...,bathrooms,surface_total,surface_covered,price,currency,price_period,title,description,property_type,operation_type
0,7LoZFkSIPOTox0r32ck42Q==,Propiedad,2019-09-15,2019-10-26,2019-09-15,-26.815439,-65.312393,Argentina,Tucumán,Yerba Buena,...,,,,,,Mensual,Terreno - Yerba Buena,Accesos pavimentados.<br>Red subterránea de ag...,Lote,Venta
1,QsZD4OxZInNd5po5LQDRmg==,Propiedad,2019-09-15,9999-12-31,2019-09-15,-26.839469,-65.212790,Argentina,Tucumán,,...,1.0,55.0,41.0,,,Mensual,Departamento - Capital,Detalles constructivos:<br><br>Pisos de porcel...,Departamento,Venta
2,qDtysoUgbnHLp0W9We+8fg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.424820,-64.181225,Argentina,Córdoba,Córdoba,...,1.0,45.0,45.0,,,Mensual,1 DORMITORIO | San Lorenzo al 400,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
3,olj51zV0HFurmaZ78U0ssg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.421242,-64.190798,Argentina,Córdoba,Córdoba,...,1.0,48.0,48.0,,,Mensual,1 DORMITORIO | M. T. de Alvear al 500,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
4,OcS1SAA5oAzjZ3Mzg3XHyg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.426064,-64.180042,Argentina,Córdoba,Córdoba,...,1.0,45.0,45.0,,,Mensual,1 DORMITORIO | Av. Poeta Lugones al 200,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,ml17bb3zNa945e9dZWrJ6Q==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-38.011919,-57.535854,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,750000.0,USD,,CHALET STELLA MARIS,Chalet en dos plantas sobre lote de 1100 m2 y ...,Otro,Venta
999996,O54TyNw4kMogEjaJqWhtLw==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-37.995254,-57.553228,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,180000.0,USD,,Venta Dto 4 Amb con Dep Macrocentro,En av. Luro esquina Salta encontramos este am...,Otro,Venta
999997,Y1K0Rl31ib5DwJL5qqCiqg==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-37.983723,-57.544284,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,450000.0,USD,,UNICA - HISTORICA - destino comercial,IDEAL DESARROLLO COMERCIAL - Pegado al Unzue -...,Otro,Venta
999998,xZ+W9ufh1Ugj2jPPqWwN8A==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-32.983590,-68.879191,Argentina,Mendoza,Cuadro Benegas,...,5.0,,,500000.0,USD,,CASA EN VENTA-CHACRAS DE CORIA,"Importante propiedad en calle Italia, Chacras ...",Otro,Venta


In [214]:
df.columns

Index(['id', 'ad_type', 'start_date', 'end_date', 'created_on', 'lat', 'lon',
       'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'rooms', 'bedrooms', 'bathrooms',
       'surface_total', 'surface_covered', 'price', 'currency', 'price_period',
       'title', 'description', 'property_type', 'operation_type'],
      dtype='object')

In [215]:
def detect_column_types(df, categorical_threshold=10):
    numeric_cols = df.select_dtypes(include=['number']).columns
    categorical_cols = [col for col in df.select_dtypes(include=['object']).columns
                        if df[col].nunique() < categorical_threshold]
    date_cols = []
    for col in df.select_dtypes(include=['object']).columns:
        try:
            pd.to_datetime(df[col])
            date_cols.append(col)
        except (ValueError, TypeError):
            continue
    
    return numeric_cols, categorical_cols, date_cols

In [216]:
from sklearn.base import BaseEstimator, TransformerMixin

class MultiDateFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
    
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        for column in self.columns:
            # Ensure each date column is a datetime type
            X[column] = pd.to_datetime(X[column])
            # Extract features for each date column
            X[f'{column}_year'] = X[column].dt.year
            X[f'{column}_month'] = X[column].dt.month
            X[f'{column}_day'] = X[column].dt.day
            X[f'{column}_day_of_week'] = X[column].dt.dayofweek

        return X.drop(columns=self.columns)


In [217]:
df

Unnamed: 0,id,ad_type,start_date,end_date,created_on,lat,lon,l1,l2,l3,...,bathrooms,surface_total,surface_covered,price,currency,price_period,title,description,property_type,operation_type
0,7LoZFkSIPOTox0r32ck42Q==,Propiedad,2019-09-15,2019-10-26,2019-09-15,-26.815439,-65.312393,Argentina,Tucumán,Yerba Buena,...,,,,,,Mensual,Terreno - Yerba Buena,Accesos pavimentados.<br>Red subterránea de ag...,Lote,Venta
1,QsZD4OxZInNd5po5LQDRmg==,Propiedad,2019-09-15,9999-12-31,2019-09-15,-26.839469,-65.212790,Argentina,Tucumán,,...,1.0,55.0,41.0,,,Mensual,Departamento - Capital,Detalles constructivos:<br><br>Pisos de porcel...,Departamento,Venta
2,qDtysoUgbnHLp0W9We+8fg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.424820,-64.181225,Argentina,Córdoba,Córdoba,...,1.0,45.0,45.0,,,Mensual,1 DORMITORIO | San Lorenzo al 400,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
3,olj51zV0HFurmaZ78U0ssg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.421242,-64.190798,Argentina,Córdoba,Córdoba,...,1.0,48.0,48.0,,,Mensual,1 DORMITORIO | M. T. de Alvear al 500,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
4,OcS1SAA5oAzjZ3Mzg3XHyg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.426064,-64.180042,Argentina,Córdoba,Córdoba,...,1.0,45.0,45.0,,,Mensual,1 DORMITORIO | Av. Poeta Lugones al 200,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,ml17bb3zNa945e9dZWrJ6Q==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-38.011919,-57.535854,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,750000.0,USD,,CHALET STELLA MARIS,Chalet en dos plantas sobre lote de 1100 m2 y ...,Otro,Venta
999996,O54TyNw4kMogEjaJqWhtLw==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-37.995254,-57.553228,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,180000.0,USD,,Venta Dto 4 Amb con Dep Macrocentro,En av. Luro esquina Salta encontramos este am...,Otro,Venta
999997,Y1K0Rl31ib5DwJL5qqCiqg==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-37.983723,-57.544284,Argentina,Buenos Aires Costa Atlántica,Mar del Plata,...,5.0,,,450000.0,USD,,UNICA - HISTORICA - destino comercial,IDEAL DESARROLLO COMERCIAL - Pegado al Unzue -...,Otro,Venta
999998,xZ+W9ufh1Ugj2jPPqWwN8A==,Propiedad,2020-02-20,2020-04-28,2020-02-20,-32.983590,-68.879191,Argentina,Mendoza,Cuadro Benegas,...,5.0,,,500000.0,USD,,CASA EN VENTA-CHACRAS DE CORIA,"Importante propiedad en calle Italia, Chacras ...",Otro,Venta


In [227]:
df1 = df[:]
df1 = df1[df1['price_period']=='Mensual']
df1 = df1[['id', 'ad_type', 'start_date', 'end_date', 'created_on', 'lat', 'lon',
       'l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'rooms', 'bedrooms', 'bathrooms',
       'surface_total', 'surface_covered', 'price', 'currency', 'price_period',
       'title', 'description', 'property_type', 'operation_type']]
df1.drop('price_period', inplace=True, axis=1)
df1

Unnamed: 0,id,ad_type,start_date,end_date,created_on,lat,lon,l1,l2,l3,...,bedrooms,bathrooms,surface_total,surface_covered,price,currency,title,description,property_type,operation_type
0,7LoZFkSIPOTox0r32ck42Q==,Propiedad,2019-09-15,2019-10-26,2019-09-15,-26.815439,-65.312393,Argentina,Tucumán,Yerba Buena,...,,,,,,,Terreno - Yerba Buena,Accesos pavimentados.<br>Red subterránea de ag...,Lote,Venta
1,QsZD4OxZInNd5po5LQDRmg==,Propiedad,2019-09-15,9999-12-31,2019-09-15,-26.839469,-65.212790,Argentina,Tucumán,,...,,1.0,55.0,41.0,,,Departamento - Capital,Detalles constructivos:<br><br>Pisos de porcel...,Departamento,Venta
2,qDtysoUgbnHLp0W9We+8fg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.424820,-64.181225,Argentina,Córdoba,Córdoba,...,,1.0,45.0,45.0,,,1 DORMITORIO | San Lorenzo al 400,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
3,olj51zV0HFurmaZ78U0ssg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.421242,-64.190798,Argentina,Córdoba,Córdoba,...,,1.0,48.0,48.0,,,1 DORMITORIO | M. T. de Alvear al 500,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
4,OcS1SAA5oAzjZ3Mzg3XHyg==,Propiedad,2019-09-15,2019-09-25,2019-09-15,-31.426064,-64.180042,Argentina,Córdoba,Córdoba,...,,1.0,45.0,45.0,,,1 DORMITORIO | Av. Poeta Lugones al 200,Alquiler temporario de Departamento 1 DORMITOR...,Departamento,Alquiler temporal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999961,AK9MeeUjjWmqYjlDZrbDCg==,Propiedad,2020-02-20,2020-02-20,2020-02-20,-34.553085,-58.439111,Argentina,Capital Federal,Belgrano,...,4.0,5.0,257.0,227.0,6000.0,USD,ESPECTACULAR TORRE FORUM ALCORTA PISO ALTO - P...,Increíble Departamento de 5 Ambientes con Balc...,Departamento,Alquiler
999962,hxECKr/KuZYItA9PitlIpA==,Propiedad,2020-02-20,2020-02-20,2020-02-20,-34.562910,-58.447737,Argentina,Capital Federal,Belgrano,...,3.0,5.0,161.0,151.0,3500.0,USD,TOWN HOUSE BARRANCAS - ÚNICO - PARQUE 3700 M2 !!!,TORRE ÚNICA E IRREPETIBLE!!! - EN EL CORAZÓN ...,Departamento,Alquiler
999963,n0aAlxYzsgk0AZXK9m4+4Q==,Propiedad,2020-02-20,2020-02-20,2020-02-20,-34.567504,-58.458643,Argentina,Capital Federal,Belgrano,...,4.0,5.0,351.0,230.0,680000.0,USD,VENTA - 5 AMB. CON 3 COCH. BAULERA Y PATIO - B...,******ESPECTACULAR DÚPLEX EN EXCLUSIVA ZONA RE...,Departamento,Venta
999964,unb7EtLHHs/tr5gLETZSxg==,Propiedad,2020-02-20,2020-02-21,2020-02-20,-34.618343,-58.370165,Argentina,Capital Federal,San Telmo,...,,5.0,,,1550000.0,USD,Venta en San Telmo - Balcarce 900,Casa de 7 ambientes en San Telmo Living comedo...,Otro,Venta


In [228]:
df1.isna().sum().sort_values(ascending=False)

l6                 418757
l5                 416952
bedrooms           312571
l4                 309336
rooms              186291
surface_total      161180
surface_covered    158552
bathrooms           94969
lon                 46715
lat                 46688
currency            44046
price               38215
l3                  22578
title                  28
description            21
property_type           0
id                      0
ad_type                 0
l2                      0
l1                      0
created_on              0
end_date                0
start_date              0
operation_type          0
dtype: int64

In [229]:
df1 = df1.drop(['l6', 'l5', 'bedrooms', 'l4'], axis=1)

In [230]:
df1 = df1[df1['currency']=='USD']
df1 = df1.dropna()

In [231]:
X = df1.drop("price", axis=1)
y = df1['price']

In [232]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline

numerical_features, categorical_features, date_columns = detect_column_types(X)
print("Numeric Columns:", numeric)
print("Categorical Columns:", categorical)
print("Date Columns:", date)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', MinMaxScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('date', MultiDateFeatureExtractor(columns=date_columns), date_columns)
    ])


Numeric Columns: Index(['lat', 'lon', 'l6', 'rooms', 'bedrooms', 'bathrooms', 'surface_total',
       'surface_covered', 'price'],
      dtype='object')
Categorical Columns: ['ad_type', 'l1', 'currency', 'price_period', 'operation_type']
Date Columns: ['start_date', 'created_on']


In [233]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('estimator', LinearRegression())])

In [234]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [235]:
pipeline.fit(X_train, y_train)

In [236]:
y_pred = pipeline.predict(X_test)

In [237]:
from sklearn.metrics import mean_squared_error, r2_score
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

Mean Squared Error: 63152863007.52336
R² Score: 0.29710572173437433
