In [None]:
import pandas as pd

recipes_df = pd.read_csv('fruit_veggie_recipes.csv')
recipes_df = recipes_df[['name', 'ingredients','id']]

In [None]:
recipes_df

# 1. Content-based recommendation system
- The get_recommendations function takes a list of user preferences (ingredients) as input.
- It finds recipes that contain all of the user's preferred ingredients.
- It calculates the similarity between the user preferences and recipes.
- It recommends the top 10 recipes that are most similar to the user's preferences and contain all of the preferred ingredients.
- If the user is new and hasn't provided any preferences,the function recommends popular recipes by randomly selecting 10 recipes from the dataset.

- Input : raw ingrediants
- Output : A list of recommended recipes for the user based on ingrediants

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(recipes_df['ingredients'])

# Calculate cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recipe recommendations
def get_recommendations(user_preferences=[], cosine_sim=cosine_sim, recipes_df=recipes_df):
    
    if len(user_preferences) == 0:
        # If the user is new and hasn't provided any preferences, recommend popular recipes
        popular_recipes = recipes_df.sample(n=10)['name'].values
        return '\n'.join(popular_recipes)
        
    # Get the indices of recipes containing all of the user's preferred ingredients
    preferred_indices = set()
    for ingredient in user_preferences:
        preferred_indices.update(recipes_df[recipes_df['ingredients'].str.contains(ingredient)].index)
    
    # Calculate the similarity between user preferences and recipes
    combined_scores = cosine_sim[list(preferred_indices)]
    combined_scores = list(enumerate(combined_scores.sum(axis=0)))
    combined_scores = sorted(combined_scores, key=lambda x: x[1], reverse=True)
    combined_scores = combined_scores[1:11]
    recipe_indices = [i[0] for i in combined_scores]
    
    # Filter out recommended recipes that do not contain all of the user's preferred ingredients
    recommended_recipes = recipes_df.iloc[recipe_indices]
    recommended_recipes = recommended_recipes[recommended_recipes['ingredients'].apply(
        lambda x: all(ingredient in x for ingredient in user_preferences))]
    
    return '\n'.join(recommended_recipes['name'].values)

### Testing on sample data

In [None]:
user_preferences = ['onion', 'potato']
recommendations = get_recommendations(user_preferences)
print(recommendations)

# 2.1 Classification-based recommendation system
- We train a Support Vector Machine (SVM) classifier to predict whether a user will like a recipe or not based on the recipe ID

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the dataset
interactions_df = pd.read_csv("RAW_interactions.csv")

# Reduce the dataset to 1000 rows
interactions_df = interactions_df.sample(n=1000, random_state=42)

# Reset the index
interactions_df = interactions_df.reset_index(drop=True)

# Define X and y
X = interactions_df[['user_id', 'recipe_id']]
y = interactions_df['rating'] > 3  # We'll treat ratings > 3 as 'liked' and ratings <= 3 as 'not liked'

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer()

# Fit and transform the training data
X_train_tfidf = tfidf.fit_transform(X_train['recipe_id'].astype(str))

# Initialize SVM classifier
clf = SVC(kernel='linear')

# Train the classifier
clf.fit(X_train_tfidf, y_train)

# Test the classifier
X_test_tfidf = tfidf.transform(X_test['recipe_id'].astype(str))
y_pred = clf.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.88


# 2.2 Classification-based recommendation system

In [19]:
from scipy.sparse import hstack
from sklearn.preprocessing import OneHotEncoder

# Initialize OneHotEncoders for user and recipe IDs
user_encoder = OneHotEncoder()
recipe_encoder = OneHotEncoder()

# Fit OneHotEncoders on both training and testing data
user_encoder.fit(X[['user_id']])
recipe_encoder.fit(X[['recipe_id']])

# Transform user and recipe IDs
X_train_user = user_encoder.transform(X_train[['user_id']])
X_train_recipe = recipe_encoder.transform(X_train[['recipe_id']])

X_test_user = user_encoder.transform(X_test[['user_id']])
X_test_recipe = recipe_encoder.transform(X_test[['recipe_id']])

# Concatenate TF-IDF vectors with one-hot encoded user and recipe IDs
X_train_combined = hstack([X_train_tfidf, X_train_user, X_train_recipe])
X_test_combined = hstack([X_test_tfidf, X_test_user, X_test_recipe])

# Initialize XGBoost classifier
xgb_clf = XGBClassifier()

# Train the classifier
xgb_clf.fit(X_train_combined, y_train)

# Test the classifier
y_pred_xgb_combined = xgb_clf.predict(X_test_combined)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_xgb_combined)

# Evaluate the model
print("Accuracy:", accuracy)

Accuracy: 0.885


# Displaying recipe image

In [None]:
import os
import matplotlib.pyplot as plt

# Function to display images of recipes
def display_recipe_images(recipe_names):
    
    for recipe_name in recipe_names:
        image_path = f"tomato basil pasta.jpeg"  # Adjust this path according to your directory structure
        if os.path.exists(image_path):
            img = plt.imread(image_path)
            plt.imshow(img)
            plt.title(recipe_name)
            plt.axis('off')
            plt.show()
        else:
            print(f"Image not found for {recipe_name}")


# Example usage
recipe_names = ["tomato basil pasta"]
display_recipe_images(recipe_names)

In [None]:
# Count the frequency of recipes saved by each user
user_ratings_df = pd.read_csv('RAW_interactions.csv')
user_recipe_count = user_ratings_df['user_id'].value_counts()

# Get the top N users
top_users = user_recipe_count.head(10)

# Plot the bar chart
plt.figure(figsize=(10, 6))
top_users.plot(kind='bar')
plt.xlabel('User ID')
plt.ylabel('Number of Recipes Saved')
plt.title('Top 10 Users with the Most Saved Recipes')
plt.show()