# Setting up the Notebook

In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# import recommendation engines from external .py file
from property_recommendation import FEATURES_FOR_DISPLAY, FEATURES_WITH_LISTING_ID, PairwiseItemRecEngine, UserItemRecEngine

## Load the Data

In [4]:
items = pd.read_csv("df_task2_onehot.csv")
items.head()

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,...,property_type_landed,property_type_semi_detached_house,property_type_shophouse,property_type_terraced_house,property_type_townhouse,property_type_walk_up,tenure_nan,tenure_freehold,tenure_99-110_year,tenure_999+year
0,122881,hdb flat for sale in 866 yishun street 81,sembawang / yishun (d27),866 yishun street 81,hdb,,1988,3,2,1115,...,0,0,0,0,0,0,1,0,0,0
1,259374,hdb flat for sale in 506b serangoon north aven...,hougang / punggol / sengkang (d19),hdb-serangoon estate,hdb,99-year leasehold,1992,4,2,1575,...,0,0,0,0,0,0,0,1,0,0
2,665422,4 bed condo for sale in meyerhouse,128 meyer road,meyerhouse,condo,freehold,2022,4,6,3070,...,0,0,0,0,0,0,0,0,0,1
3,857699,3 bed condo for sale in leedon green,26 leedon heights,leedon green,condo,freehold,2023,3,2,958,...,0,0,0,0,0,0,0,0,0,1
4,216061,2 bed condo for sale in one bernam,1 bernam street,one bernam,condo,99-year leasehold,2026,2,1,732,...,0,0,0,0,0,0,0,1,0,0


In [5]:
user_profiles = pd.read_csv("user_profile.csv")
user_profiles.head()

Unnamed: 0,profile_id,listing_id,view_time,index,num_beds,price,property_type,planning_area,subzone,name_of_nearest_mrt,num_baths,tenure,built_year,size_sqft,per_price
0,0,243265,4,0,"6.0,7.0,5.0,4.0,8.0",,"condo,hdb",,,,,,19902029.0,2105,16469547
1,0,844619,2,0,"6.0,7.0,5.0,4.0,8.0",,"condo,hdb",,,,,,19902029.0,2105,16469547
2,0,887175,5,0,"6.0,7.0,5.0,4.0,8.0",,"condo,hdb",,,,,,19902029.0,2105,16469547
3,0,761559,2,0,"6.0,7.0,5.0,4.0,8.0",,"condo,hdb",,,,,,19902029.0,2105,16469547
4,0,211308,2,0,"6.0,7.0,5.0,4.0,8.0",,"condo,hdb",,,,,,19902029.0,2105,16469547


# Computing the Top Recommendations

## Scenario 1: Recommend based on the item last viewed/currently being viewed

In [6]:
k = 3
pairwiseItemRecEngine = PairwiseItemRecEngine(items)
get_top_recommendations = pairwiseItemRecEngine.get_top_recommendations

In [7]:
for profile_id in range(5): # adjust to see more examples
    print("======================================== user {} ========================================".format(profile_id))
    
    # last viewed/currently viewed listing id
    reference_item_index = user_profiles[user_profiles['profile_id'] == profile_id]['view_time'].idxmax()
    view_history = user_profiles.iloc[[reference_item_index]]
    reference_lids = view_history['listing_id'].tolist()
    print("reference_item:")
    print(items[items['listing_id'].isin(reference_lids)][FEATURES_FOR_DISPLAY])
    
    # recommended listing ids
    most_similar_lids = get_top_recommendations(view_history, k)
    print("recommendations:")
    print(items[items['listing_id'].isin(most_similar_lids)][FEATURES_FOR_DISPLAY])

reference_item:
       listing_id property_type  num_beds  num_baths       lat         lng  \
16019      524387         condo         4          4  1.317717  103.829904   

       size_sqft  dist_to_nearest_important_mrt_rounded    price  built_year  \
16019       2077                                      1  6300000        2014   

         tenure  
16019  freehold  
recommendations:
       listing_id property_type  num_beds  num_baths       lat         lng  \
9704       184136         condo         4          4  1.311239  103.828154   
15433      660576         condo         4          4  1.317717  103.829904   
19380      851641         condo         3          2  1.340464  103.883733   

       size_sqft  dist_to_nearest_important_mrt_rounded    price  built_year  \
9704        2476                                      1  7875000        2014   
15433       2077                                      1  6300000        2014   
19380        947                                      2  193

## Scenario 2: Recommend based on view history
### Approach 1: Pairwise item-item similarity 
with filtering based on search criteria

In [8]:
k = 3
pairwiseItemRecEngine = PairwiseItemRecEngine(items)
get_top_recommendations = pairwiseItemRecEngine.get_top_recommendations_based_on_view_history

In [9]:
for profile_id in range(5): # adjust to see more examples
    print("======================================== user {} ========================================".format(profile_id))
    
    # last 15 viewed listing ids
    view_history = user_profiles[user_profiles['profile_id'] == profile_id]
    reference_lids = view_history['listing_id'].to_list()
    print("view history:")
    print(items[items['listing_id'].isin(reference_lids)][FEATURES_FOR_DISPLAY])
    
    # recommended listing ids
    most_similar_lids = pairwiseItemRecEngine.get_top_recommendations_based_on_view_history(view_history, k)
    print("recommendations:")
    print(items[items['listing_id'].isin(most_similar_lids)][FEATURES_FOR_DISPLAY])

view history:
       listing_id property_type  num_beds  num_baths       lat         lng  \
250        802295         condo         4          3  1.313186  103.899693   
4295       211308         condo         4          4  1.298416  103.857178   
4618       844619         condo         4          3  1.330709  103.868391   
5991       593601         condo         4          4  1.344334  103.878690   
6573       243265         condo         4          3  1.314664  103.831084   
7457       397248         condo         4          4  1.283016  103.839887   
7991       182886         condo         5          6  1.312402  103.841172   
9239       350347         condo         4          3  1.312364  103.803271   
10079      867420         condo         4          4  1.312076  103.804055   
12686      190211         condo         4          3  1.310184  103.835914   
13103      761559         condo         4          4  1.294106  103.836735   
14440      777580         condo         5         

### Approach 2: User-item similarity 
with filtering based on search criteria

In [10]:
k = 3
userItemRecEngine = UserItemRecEngine(items)
get_top_recommendations = userItemRecEngine.get_top_recommendations_based_on_view_history

In [11]:
for profile_id in range(5): # adjust to see more examples
    print("======================================== user {} ========================================".format(profile_id))
    
    # last 15 viewed listing ids
    userItemRecEngine = UserItemRecEngine(items)
    view_history = user_profiles[user_profiles['profile_id'] == profile_id]
    reference_lids = view_history['listing_id'].to_list()
    print("view history:")
    print(items[items['listing_id'].isin(reference_lids)][FEATURES_FOR_DISPLAY])

    most_similar_lids = userItemRecEngine.get_top_recommendations_based_on_view_history(view_history, k)
    print("recommendations:")
    print(items[items['listing_id'].isin(most_similar_lids)][FEATURES_FOR_DISPLAY])

view history:
       listing_id property_type  num_beds  num_baths       lat         lng  \
250        802295         condo         4          3  1.313186  103.899693   
4295       211308         condo         4          4  1.298416  103.857178   
4618       844619         condo         4          3  1.330709  103.868391   
5991       593601         condo         4          4  1.344334  103.878690   
6573       243265         condo         4          3  1.314664  103.831084   
7457       397248         condo         4          4  1.283016  103.839887   
7991       182886         condo         5          6  1.312402  103.841172   
9239       350347         condo         4          3  1.312364  103.803271   
10079      867420         condo         4          4  1.312076  103.804055   
12686      190211         condo         4          3  1.310184  103.835914   
13103      761559         condo         4          4  1.294106  103.836735   
14440      777580         condo         5         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  view_history['view_score'] = view_history['view_time']/last_view_time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  view_history['view_score'] = view_history['view_time']/last_view_time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  view_history['view_score'] = view_history['view_time']/last_view_

recommendations:
       listing_id property_type  num_beds  num_baths       lat         lng  \
5634       185317         condo         3          2  1.286177  103.839550   
8292       702681         condo         1          1  1.298416  103.857178   
11439      204049         condo         3          3  1.310429  103.802821   

       size_sqft  dist_to_nearest_important_mrt_rounded    price  built_year  \
5634        1076                                      1  2205000        2025   
8292         409                                      0  1561400        2024   
11439       1055                                      1  2955600        2025   

                  tenure  
5634   99-year leasehold  
8292   99-year leasehold  
11439           freehold  
view history:
       listing_id property_type  num_beds  num_baths       lat         lng  \
588        237551         condo         4          4  1.315961  103.836848   
821        297697         condo         5          5  1.312402  103.841

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  view_history['view_score'] = view_history['view_time']/last_view_time
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  view_history['view_score'] = view_history['view_time']/last_view_time
