# References:
-

### Import libraries

In [12]:
import gensim
import pandas as pd
from gensim.models.doc2vec import Doc2Vec, Word2Vec
from gensim.test.utils import get_tmpfile

### Load data

In [13]:
root = './RASA/data/'
model_root='./RASA/offline_models/'
processed = './Data/processing/Processed_Airbnb/'
raw = './Data/raw/'

### Recommend by user review comments
- https://towardsdatascience.com/detecting-document-similarity-with-doc2vec-f8289a9a7db7

In [29]:
reviews = pd.read_csv(processed+'ratings_filter.csv', sep=',')
vector_file_name = root+"embeddings/review_embeddings"
def recommendListingByUserComments(reviews,vector_file_name,topK):
    result=[]
    
    comment_corpus_model = Doc2Vec.load(vector_file_name) 
    new_doc = gensim.parsing.preprocessing.preprocess_string("private room dishwasher safety")
    test_doc_vector = comment_corpus_model.infer_vector(new_doc)
    sims = comment_corpus_model.dv.most_similar(positive = [test_doc_vector])
    for s in sims[:topK+1]:
        try:
            result.append(reviews['listing_id'].iloc[s[0]])
        except:
            print()
    return result

print('Top-5 recommended listing ids:')
print(recommendListingByUserComments(reviews,vector_file_name,5))

Top-5 recommended listing ids:

['31979911', '35927687', '35927687', '31945057', '37528056']


### Recommend by listing vector properties

In [15]:
listings = pd.read_csv(raw+'listings.csv.gz', sep=',')
vector_file_name = root+"embeddings/list_embeddings"
def recommendListingByProperties(listings,vector_file_name,topK):
    result=[]
    
    comment_corpus_model = Doc2Vec.load(vector_file_name) 
    new_doc = gensim.parsing.preprocessing.preprocess_string("private room dishwasher safety")
    test_doc_vector = comment_corpus_model.infer_vector(new_doc)
    sims = comment_corpus_model.docvecs.most_similar(positive = [test_doc_vector])
    for s in sims[:topK]:
        result.append(reviews['listing_id'].iloc[s[0]])
    return result

print('Top-5 recommended listing ids:')
print(recommendListingByUserComments(reviews,vector_file_name,5))

Top-5 recommended listing ids:
[70598.0, 118097.0, 118097.0, 55621.0, 118097.0]


  sims = comment_corpus_model.docvecs.most_similar(positive = [test_doc_vector])


### Recommend by listings - ContentBased filtering

In [27]:
model = Word2Vec.load(model_root+'ContentBasedFilter')
watch = listings[["listing_id","listing_url","name"]]

# create movie id and tittle dictionary
watch_dict = watch.groupby('listing_id')['listing_id'].apply(list).to_dict()

def similar_watch(v, n = 5):

    # extract most similar movies for the input vector
    ms = model.wv.similar_by_vector(v, topn= n+1)[1:]
    # extract name and similarity score of the similar movies
    
    return ms  

topK=5
ls = similar_watch('2818',topK)
print('Top-'+str(topK)+' recommendations:')
for j in ls:
    print(j[0])
   
        

Top-5 recommendations:
1383369
505650
18882385
28170875
23332938


### Recommend by listings - Collaborative filtering

In [5]:
# https://sigdelta.com/blog/how-to-install-pyspark-locally/

In [11]:
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALSModel

def recommendByALSCollabFilter(topK,user_id):

    spark = SparkSession.builder.getOrCreate()
    sc = spark.sparkContext
    alsmodel = ALSModel.load("/Users/sudhavijayakumar/Documents/299/299A-SMARTRec/RASA/offline_models/als_model")
    # convert this into a dataframe so that it can be passed into the recommendForUserSubset
    functiondf = sc.parallelize(user_id).toDF(['reviewer_id'])

    recommendations = alsmodel.recommendForUserSubset(functiondf , topK)
    recommendations.collect()

    recommended_listings = [recommendations.collect()[0]['recommendations'][x]['listing_id'] for x in range(0,topK)]
    return recommended_listings


user_id = [[164729]]
sims = recommendByALSCollabFilter(5,user_id)

# Get first 5 matches
listingss=[]
for s in sims:
	listingss.append('https://www.airbnb.com/rooms/'+str(s))
listingss

['https://www.airbnb.com/rooms/29456931',
 'https://www.airbnb.com/rooms/22789981',
 'https://www.airbnb.com/rooms/38056203',
 'https://www.airbnb.com/rooms/24534357',
 'https://www.airbnb.com/rooms/13152028']