### Script
- **Input:** scripts to run traditional recommendation using content-based, and collaborative filtering approaches.
- **Output:** Sample recommendations.

### Import libraries

In [2]:
!pip install gensim
import gensim
import pandas as pd
from gensim.models.doc2vec import Doc2Vec, Word2Vec
from gensim.test.utils import get_tmpfile

Collecting gensim
  Using cached gensim-4.1.2-cp38-cp38-macosx_10_9_x86_64.whl (24.0 MB)
Collecting smart-open>=1.8.1
  Using cached smart_open-5.2.1-py3-none-any.whl (58 kB)
Installing collected packages: smart-open, gensim
Successfully installed gensim-4.1.2 smart-open-5.2.1


### Load data

In [3]:
root = './RASA_realtime_recommendation/data/'
model_root='./RASA_realtime_recommendation/offline_models/'
processed = './Data/processing/Processed_Airbnb/'
raw = './Data/raw/'

### Recommend by user review comments
- https://towardsdatascience.com/detecting-document-similarity-with-doc2vec-f8289a9a7db7

In [4]:
reviews = pd.read_csv(processed+'ratings_filter.csv', sep=',')
vector_file_name = root+"embeddings/review_embeddings"
def recommendListingByUserComments(reviews,vector_file_name,topK):
    result=[]
    
    comment_corpus_model = Doc2Vec.load(vector_file_name) 
    new_doc = gensim.parsing.preprocessing.preprocess_string("private room dishwasher safety")
    test_doc_vector = comment_corpus_model.infer_vector(new_doc)
    sims = comment_corpus_model.dv.most_similar(positive = [test_doc_vector])
    for s in sims[:topK+1]:
        try:
            result.append(reviews['listing_id'].iloc[s[0]])
        except:
            print()
    return result

print('Top-5 recommended listing ids:')
print(recommendListingByUserComments(reviews,vector_file_name,5))

Top-5 recommended listing ids:


FileNotFoundError: [Errno 2] No such file or directory: './RASA_realtime_recommendation/data/embeddings/review_embeddings.dv.vectors.npy'

### Recommend by listing vector properties

In [5]:
listings = pd.read_csv(raw+'listings.csv.gz', sep=',')
vector_file_name = root+"embeddings/list_embeddings"
def recommendListingByProperties(listings,vector_file_name,topK):
    result=[]
    
    comment_corpus_model = Doc2Vec.load(vector_file_name) 
    new_doc = gensim.parsing.preprocessing.preprocess_string("private room dishwasher safety")
    test_doc_vector = comment_corpus_model.infer_vector(new_doc)
    sims = comment_corpus_model.docvecs.most_similar(positive = [test_doc_vector])
    for s in sims[:topK]:
        result.append(reviews['listing_id'].iloc[s[0]])
    return result

print('Top-5 recommended listing ids:')
print(recommendListingByUserComments(reviews,vector_file_name,5))

Top-5 recommended listing ids:
['80635', '82482', '82482', '169356', '118097', '118097']


### Recommend by listings - ContentBased filtering

In [7]:
model = Word2Vec.load(model_root+'ContentBasedFilter')
watch = listings[["id","listing_url","name"]]

# create movie id and tittle dictionary
watch_dict = watch.groupby('id')['id'].apply(list).to_dict()

def similar_watch(v, n = 5):

    # extract most similar movies for the input vector
    ms = model.wv.similar_by_vector(v, topn= n+1)[1:]
    # extract name and similarity score of the similar movies
    
    return ms  

topK=5
ls = similar_watch('2818',topK)
print('Top-'+str(topK)+' recommendations:')
for j in ls:
    print(j[0])
   
        

Top-5 recommendations:
1383369
505650
18882385
28170875
23332938


### Recommend by listings - Collaborative filtering

In [5]:
# https://sigdelta.com/blog/how-to-install-pyspark-locally/

In [11]:
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALSModel

def recommendByALSCollabFilter(topK,user_id):

    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext
    alsmodel = ALSModel.load("/Users/sudhavijayakumar/Documents/299/299A-SMARTRec/RASA/offline_models/als_model")
    # convert this into a dataframe so that it can be passed into the recommendForUserSubset
    functiondf = sc.parallelize(user_id).toDF(['reviewer_id'])

    recommendations = alsmodel.recommendForUserSubset(functiondf , topK)
    recommendations.collect()

    recommended_listings = [recommendations.collect()[0]['recommendations'][x]['listing_id'] for x in range(0,topK)]
    return recommended_listings


user_id = [[164729]]
sims = recommendByALSCollabFilter(5,user_id)

# Get first 5 matches
listingss=[]
for s in sims:
	listingss.append('https://www.airbnb.com/rooms/'+str(s))
listingss

['https://www.airbnb.com/rooms/29456931',
 'https://www.airbnb.com/rooms/22789981',
 'https://www.airbnb.com/rooms/38056203',
 'https://www.airbnb.com/rooms/24534357',
 'https://www.airbnb.com/rooms/13152028']