In [232]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
# from dask_ml.feature_extraction import HashingVectorizer
import os
import seaborn as sns
from collections import Counter
import plotly.express as px
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
from scipy.sparse import csr_matrix
warnings.filterwarnings("ignore")

---

## Item Profiles based on TF-IDF of tags column

In [236]:
ratings = pd.read_csv('./clean_data/interactions_TRAIN.csv').drop('Unnamed: 0', axis=1)
recipes = pd.read_csv('./clean_data/recipes.csv').drop('Unnamed: 0', axis=1)

# ratings_TRAIN_dask = dd.from_pandas(ratings_TRAIN, 1).reset_index().drop('index', axis=1)
recipes_dask = dd.read_csv('./clean_data/recipes.csv').drop('Unnamed: 0', axis=1)

In [237]:
class TFIDF_Recommendations():
    
    def __init__(self, recipes, ratings):
        self.recipes = recipes
        self.recipes['tags'] = self.recipes['tags'].map(eval).map(lambda x: ' '.join(x))
        self.cosine_matrix = None
        self.flag = False
        self.indices = pd.Series(self.recipes.index, index = self.recipes['recipe_id']).drop_duplicates()
        
        rated_recipes = ratings['recipe_id'].unique()
        self.recipes = self.recipes[self.recipes['recipe_id'].isin(rated_recipes)]
        
        self.ratings = ratings.merge(self.recipes, on = 'recipe_id')
        self.ratings.drop(['date', 'minutes', 'contributor_id', 'submitted', 'n_steps', 'description', 'ingredients', 'n_ingredients', 'Calories', 'Total_fat_PDV', 'Sugar_PDV', 'Sodium_PDV', 'Protein_PDV', 'Saturated_fat_PDV', 'Carbohydrates_PDV', 'steps'], axis=1)
        self.ratings['mean_user_rating'] = self.ratings.groupby('user_id')['rating'].transform('mean')
        self.ratings['weighted_rating'] = self.ratings['rating'] - self.ratings['mean_user_rating']
        
        self.tfidf = TfidfVectorizer(stop_words='english', token_pattern=r"(?u)\S\S+")
        self.tfidf_ratings_matrix = tfidf.fit_transform(self.ratings['tags'])
        self.tfidf_recipes_matrix = tfidf.fit_transform(self.recipes['tags'])
    
    def display_tag_distribution(self):
        
        tags_count = self.recipes['tags'].map(Counter).sum().compute().most_common()
        fig = px.bar(tags_count,
                     x = 0,
                     y = 1,
                     log_y = True,
                     title = "Count Distribution of Tags Column")
        fig.update_layout(
            xaxis_title="Tag Names",
            yaxis_title="Counts",
            yaxis = dict(
                tickmode = 'linear',
                tick0 = 0,
                dtick = 1
    ))
    
        return fig
    
    def display_cosine_matrix(self, n=5):
        
        try:
            fig = px.imshow(self.cosine_matrix[:n, :n],
                            labels = dict(x = "Recipe Index", y = "Recipe Index", color = "Cosine Similarity"),
                            title = "Cosine similarity of Recipes",
                            text_auto = True)
        except:
            raise Exception("Generate a cosine matrix first by calling generate_cosine_sim_matrix()")
            
        return fig
    
    def generate_user_profile(self, user_id):
        
        user_profile = 0
        for index in self.ratings[self.ratings['user_id'] == user_id].index:
            user_profile += self.tfidf_ratings_matrix[index] * self.ratings.loc[index]['weighted_rating']

        return user_profile
        
    
    def generate_recommendations(self, user_id, n):
        
        similar_recipes = []
        toReturn = []
        curr_similarity = 0
        user_profile = self.generate_user_profile(user_id)
        for i in range(0, self.tfidf_recipes_matrix.shape[0]):
            curr_similarity = cosine_similarity(self.tfidf_recipes_matrix[i], user_profile)
            similar_recipes.append((curr_similarity[0][0], i))
        
        similar_recipes = sorted(similar_recipes, key = lambda x: x[0])[:n]
        
        for sim, idx in similar_recipes:
            recipe = self.recipes.loc[idx]['name']
            toReturn.append(recipe)
            
        return toReturn

In [238]:
obj = TFIDF_Recommendations(recipes, ratings)

In [239]:
obj.generate_recommendations(104295, 10)

['the bestest cheezie sauce',
 'crispy blue cheese potatoes',
 'chilli con carne  21 day wonder diet   day 2',
 'galbi  kalbi  grilled korean short ribs  flanken cut',
 'cinnamon crackers',
 'paper lined sponge cake',
 'parsnip croquettes',
 'easy homemade almond roca',
 'salad beatrice',
 'grilled lemon trout']