In [2]:
import pandas as pd
import numpy as np
import time
from app.load_data import load_listings
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_extraction.text import CountVectorizer



In [14]:
from sklearn.pipeline import FunctionTransformer


def train_recommender(data, n_neighbors = 4, weight1=1):
    def weight(x, factor):
        return x * factor
    features = ['price', 'latitude', 'longitude', 'room_type', 'amenities']
    X = data[features]
    
    numeric_transformer = Pipeline(steps=[  
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('weight', FunctionTransformer(weight, kw_args={'factor': 20}))
    ])
    
    roomtype_transformer = Pipeline(steps=[  
        ('onehot', OneHotEncoder(handle_unknown='ignore')),
        ('weight', FunctionTransformer(weight, kw_args={'factor': weight1}, accept_sparse=True))
    ])
    amenities_transformer = CountVectorizer(token_pattern=r'[^;]+', binary=True)
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, ['price', 'latitude', 'longitude']),
            ('room', roomtype_transformer, ['room_type']),
            ('amen', amenities_transformer, 'amenities'),
        ],
        remainder='drop'
    )
    
    model = Pipeline(steps=[('preprocessor', preprocessor),
                            ('knn', NearestNeighbors(n_neighbors= n_neighbors))])
    model.fit(X)
    return model

In [15]:


from turtle import distance


listings = load_listings('Singapore, Singapore, Singapore')
model = train_recommender(listings,4, 1)
input = model.named_steps['preprocessor'].transform(listings)
distances, indices  = model.named_steps['knn'].kneighbors(input)
model1 = train_recommender(listings,4, 100)
input = model1.named_steps['preprocessor'].transform(listings)
distances1, indices  = model1.named_steps['knn'].kneighbors(input)
np.unique(distances - distances1)

array([-6.07553118e+01, -5.87094583e+01, -5.77043403e+01, ...,
        5.30018520e-10,  6.37794548e-10,  7.05268712e-10], shape=(5291,))