# Load packages

In [1]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from matplotlib.pyplot import figure
from PIL import Image
import requests
from io import BytesIO
import os
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import Normalizer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
import re
import warnings
warnings.simplefilter("ignore")

# Define functions

## Content based

In [None]:
def create_url(designer, display_name):
  link = 'https://www.renttherunway.com/shop/designers/' + \
          re.sub(r'[^a-zA-Z0-9_]', '', designer.replace(' ', '_')).lower() + \
          '/' + \
          re.sub(r'[^a-zA-Z0-9_]', '', display_name.replace(' ', '_')).lower()
  return link

In [None]:
# Get top 5 similar products for a given product
def content_based_recommendation(item_id):
  n_products = 5
  top_5 = cs_matrix_content[[item_id]].sort_values(ascending = False, by = item_id)[0:n_products+1]
  top_5_links = top_5.join(p_data[['product_img_link', 'displayName', 'designer_name']])

  fig, axs = plt.subplots(1, n_products+2, figsize=(20, 6))
  i = 0
  for id_, cosine, link, display_name, designer in zip(top_5_links.index, \
                                            top_5_links[item_id].values , \
                                            top_5_links['product_img_link'].values, \
                                            top_5_links['displayName'].values,
                                            top_5_links['designer_name'].values
                                            ):
    product_link = create_url(designer, display_name)

    if type(link) == str:
      response = requests.get(link)
      img = Image.open(BytesIO(response.content))
      axs[i].imshow(np.asarray(img))
      if i == 0:
        axs[i].set_title('Given Product: ' + id_ + '\n' + display_name)
        # axs[i].axis('off')
        print('Given product:', product_link)
        i+=1
        link = 'https://making-the-web.com/sites/default/files/clipart/129282/arrow-right-129282-3496286.jpg'
        response = requests.get(link)
        img = Image.open(BytesIO(response.content))
        axs[i].imshow(np.asarray(img))
        axs[i].axis('off')

      else:
        axs[i].set_title('Recommended' + '_' + str(i-1) + ': ' + id_ + '\n' + display_name) 
        axs[i].set_xlabel('Cosine similarity: ' + str(round(cosine,2)))
        # axs[i].axis('off')
        print('Recommended' + '_' + str(i-1) + ':', product_link)
    else:
      link = 'https://www.wildhareboca.com/wp-content/uploads/sites/310/2018/03/image-not-available-200x300.jpg'
      response = requests.get(link)
      img = Image.open(BytesIO(response.content))
      axs[i].imshow(np.asarray(img))
      axs[i].set_title(id_)
      # axs[i].axis('off')
    i+=1
  plt.show()

## Collaborative

In [1]:
def get_similarities(user1, user2):
  # Similar ratings
  user1_rating_df = user_item_rating.loc[(user_item_rating['user_id'] == user1) &
                      (user_item_rating['value'] != 0), ['variable', 'value']]

  user2_rating_df = user_item_rating.loc[(user_item_rating['user_id'] == user2) &
                      (user_item_rating['value'] != 0), ['variable', 'value']]

  similarities_rating_df = user1_rating_df.merge(user2_rating_df, 
                                                left_on = 'variable', 
                                                right_on = 'variable', 
                                                how='inner', 
                                                suffixes=('_'+str(user1), '_'+str(user2)))

  # Similar demogs
  user1_demog_df = review_data_nodup_melt.loc[(review_data_nodup_melt['user_id'] == user1) &
                      (review_data_nodup_melt['value'] != 0), ['variable', 'value']]

  user2_demog_df = review_data_nodup_melt.loc[(review_data_nodup_melt['user_id'] == user2) &
                      (review_data_nodup_melt['value'] != 0), ['variable', 'value']]

  similarities_demog_df = user1_demog_df.merge(user2_demog_df, 
                                              left_on = 'variable', 
                                              right_on = 'variable', 
                                              how='inner', 
                                               suffixes=('_'+str(user1), '_'+str(user2)))

  # Append
  similarities_df = pd.concat([similarities_rating_df, similarities_demog_df])
  return similarities_df

In [2]:
def same_products_links(similarities_df):
  similarities_df_products = similarities_df.loc[similarities_df['variable']\
                                      .apply(lambda x: x.startswith('Product')), :]\

  similarities_df_products['product'] = similarities_df_products['variable'].apply(lambda x: x.split('_')[1])
  similarities_df_products = similarities_df_products.join(all_product_data_list[['designer_name', 'displayName','product_img_link']], on='product')
  similarities_df_products.columns = ['variable', 'user1', 'user2', 'product', 'designer_name', 'displayName', 'product_img_link']

  i = 1
  for designer, display_name in zip(similarities_df_products['designer_name'], similarities_df_products['displayName']):
    product_link = create_url(designer, display_name)
    print('Same product', str(i), product_link)
    i+=1
  return similarities_df_products[['product', 'displayName', 'product_img_link']]

In [3]:
def get_recommendations_two_users(user1, user2):
  # Ratings
  user1_rating_df = user_item_rating.loc[(user_item_rating['user_id'] == user1) &
                      (user_item_rating['value'] != 0), ['variable', 'value']]

  user2_rating_df = user_item_rating.loc[(user_item_rating['user_id'] == user2) &
                      (user_item_rating['value'] != 0), ['variable', 'value']]
  # Join
  joined_rating_df = user2_rating_df.merge(user1_rating_df, 
                                                left_on = 'variable', 
                                                right_on = 'variable', 
                                                how='left', 
                                                suffixes=('_'+str(user2), '_'+str(user1)))
  # Unrated products by user1
  unrated_products_df = joined_rating_df.loc[joined_rating_df['value_' + str(user1)].isna(), :]
  # Sort by ratings
  unrated_products_df = unrated_products_df.sort_values(by='value_' + str(user2), ascending=False)
  # Filter products with at least rating = 8
  recommended_products = unrated_products_df.loc[unrated_products_df['value_' + str(user2)] >= 8, 'variable']
  # Recommend top 5 products
  recommended_products = recommended_products[:5]
  return recommended_products, unrated_products_df

In [4]:
def create_url(designer, display_name):
  link = 'https://www.renttherunway.com/shop/designers/' + \
          re.sub(r'[^a-zA-Z0-9_]', '', designer.replace(' ', '_')).lower() + \
          '/' + \
          re.sub(r'[^a-zA-Z0-9_]', '', display_name.replace(' ', '_')).lower()
  return link

In [5]:
def print_images(df, text_to_write):
  if df.shape[0] > 5:
    df = df.iloc[:5,:]

  product_img_link = df['product_img_link'].values
  displayName = df['displayName'].values
  product = df['product'].values

  n = len(product_img_link)
  if n > 1:
    fig, axs = plt.subplots(1, n, figsize=(n*4, 5))
    i = 0
    for link, display_name, id_ in zip(product_img_link, displayName, product):
      if type(link) == str:
          response = requests.get(link)
          img = Image.open(BytesIO(response.content))
          axs[i].imshow(np.asarray(img))
          axs[i].set_title(text_to_write + str(i+1) + ': ' + id_ + '\n' + display_name)
          # axs[i].axis('off')

      else:
        link = 'https://www.wildhareboca.com/wp-content/uploads/sites/310/2018/03/image-not-available-200x300.jpg'
        response = requests.get(link)
        img = Image.open(BytesIO(response.content))
        axs[i].imshow(np.asarray(img))
        axs[i].set_title(id_)
        # axs[i].axis('off')
      i+=1
    plt.show()

  else:
    fig, axs = plt.subplots(1, n, figsize=(n*4, 5))
    i = 0
    for link, display_name, id_ in zip(product_img_link, displayName, product):
      if type(link) == str:
          response = requests.get(link)
          img = Image.open(BytesIO(response.content))
          axs.imshow(np.asarray(img))
          axs.set_title(text_to_write + str(i+1) + ': ' + id_ + '\n' + display_name)
          # axs[i].axis('off')

      else:
        link = 'https://www.wildhareboca.com/wp-content/uploads/sites/310/2018/03/image-not-available-200x300.jpg'
        response = requests.get(link)
        img = Image.open(BytesIO(response.content))
        axs.imshow(np.asarray(img))
        axs.set_title(id_)
        # axs[i].axis('off')
      i+=1
    plt.show()

In [6]:
def collaborative_recommendations(user_id):
  n_recommended = 0
  n_simiar_user = 1
  while(n_recommended < 5):

    print('\n---------------------------------------------------\n')
    print('Finding similar user')
    print('Iteration:', n_simiar_user)
    print('\n---------------------------------------------------\n')

    # Get top Similar user
    top_similar_user = cs_matrix_collaborative[[user_id]].sort_values(ascending = False, by = user_id)
    print('Top Similar User:', top_similar_user.index[n_simiar_user])
    print('cosine:', top_similar_user.values[n_simiar_user])
    print('\n---------------------------------------------------\n')
    # Get similarities between the users
    similarities_df = get_similarities(user_id, top_similar_user.index[n_simiar_user])

    print('Similarites:')
    display(similarities_df)

    # Get product links for same products that the users bought
    same_products_img_links = same_products_links(similarities_df)
    print('\n---------------------------------------------------\n')
    print_images(same_products_img_links, 'Same Product ')
    print('\n---------------------------------------------------\n')
    
    # Get recommendations and unrated products based on the top similar user
    recos, unrated_products = get_recommendations_two_users(user_id, top_similar_user.index[n_simiar_user])

    # Subset only the required number of products
    recos = recos[:5-n_recommended]
    print('Products rated by similar user but not by given user:')
    display(unrated_products)
    print('\n---------------------------------------------------\n')

    # Get product links for recommendations
    recos = recos.apply(lambda x: x.split('_')[1]).values
    recos_df = all_product_data_list.loc[recos,['displayName', 'designer_name', 'product_img_link']]
    reco_products_img_links = recos_df.reset_index().drop(columns=['designer_name'])
    reco_products_img_links.columns = ['product',	'displayName', 'product_img_link']

    print_images(reco_products_img_links, 'Recommended Product ')
    print('Recommended products:')
    for designer, display_name in zip(recos_df['designer_name'], recos_df['displayName']):
      n_recommended+=1
      product_link = create_url(designer, display_name)
      print('Recommendation', str(n_recommended), product_link)

    # Go to the next similar user if we did not find 5 unrated prodcuts
    n_simiar_user+=1

In [None]:
product_list = list(cs_matrix_content.index)
user_list = list(cs_matrix_collaborative.index)