In [None]:
import pandas as pd
import numpy as np
import re
import string

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [None]:
# Load datasets
df = pd.read_csv('/content/1662574418893344 (1).csv')
ratings = pd.read_csv('/content/ratings (1).csv')

In [None]:
# Preprocessing functions
def preprocess_text(text):
    """Clean and preprocess text data."""
    text = text.lower()
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

In [None]:
df['Describe'] = df['Describe'].apply(preprocess_text)

In [None]:
# Content-Based Filtering (TF-IDF)
def create_tfidf_matrix(df, column='Describe'):
    """Create TF-IDF matrix for content-based filtering."""
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df[column])
    return tfidf_matrix

In [None]:
def get_content_based_recommendations(title, df, cosine_sim, only_food):
    """Get recommendations based on content similarity."""
    idx = only_food[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Top 5 recommendations excluding the item itself
    food_indices = [i[0] for i in sim_scores]
    return df['Name'].iloc[food_indices]

In [None]:
# Advanced Content-Based Filtering
def create_advanced_content_matrix(df, features):
    """Create a matrix for advanced content-based filtering using CountVectorizer."""
    df['combined_features'] = df[features].apply(lambda x: ' '.join(x), axis=1)
    count = CountVectorizer(stop_words='english')
    count_matrix = count.fit_transform(df['combined_features'])
    return count_matrix

In [None]:
# Collaborative Filtering
def prepare_collaborative_data(ratings):
    """Prepare collaborative filtering data."""
    ratings = ratings.dropna().iloc[:511]
    rating_matrix = ratings.pivot_table(index='Food_ID', columns='User_ID', values='Rating').fillna(0)
    csr_rating_matrix = csr_matrix(rating_matrix.values)
    return rating_matrix, csr_rating_matrix


In [None]:
def train_collaborative_filtering_model(csr_rating_matrix):
    """Train the collaborative filtering model."""
    recommender = NearestNeighbors(metric='cosine', algorithm='auto')
    recommender.fit(csr_rating_matrix)
    return recommender


In [None]:
def get_collaborative_recommendations(title, df, recommender, rating_matrix):
    """Get collaborative filtering recommendations."""
    user = df[df['Name'] == title]

    # Use .iloc[0] to correctly access the single value from the Series
    user_index = np.where(rating_matrix.index == int(user['Food_ID'].iloc[0]))[0][0]

    reshaped = rating_matrix.iloc[user_index].values.reshape(1, -1)
    distances, indices = recommender.kneighbors(reshaped, n_neighbors=6)

    nearest_neighbors_indices = rating_matrix.iloc[indices[0]].index[1:]
    nearest_neighbors = pd.DataFrame({'Food_ID': nearest_neighbors_indices})
    result = pd.merge(nearest_neighbors, df, on='Food_ID', how='left')

    return result['Name']

In [None]:
# Initialize matrices and models
tfidf_matrix = create_tfidf_matrix(df)
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [None]:
only_food = pd.Series(df.index, index=df['Name']).drop_duplicates()


In [None]:
count_matrix = create_advanced_content_matrix(df, features=['C_Type', 'Veg_Non', 'Describe'])
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)


In [None]:
rating_matrix, csr_rating_matrix = prepare_collaborative_data(ratings)
recommender = train_collaborative_filtering_model(csr_rating_matrix)


In [None]:
# Test Recommendations
print("Content-based Recommendations (Simple):")
print(get_content_based_recommendations('christmas cake', df, cosine_sim, only_food))


Content-based Recommendations (Simple):
378      Grilled Chicken with Almond and Garlic Sauce
234                                  whole wheat cake
393    Fig and Sesame Tart with Cardamom Orange Cream
227                         chocolate chip cheesecake
250                            lemon poppy seed cake 
Name: Name, dtype: object


In [None]:
print("\nContent-based Recommendations (Advanced):")
print(get_content_based_recommendations('christmas cake', df, cosine_sim2, only_food))



Content-based Recommendations (Advanced):
250    lemon poppy seed cake 
228       chocolate lava cake
198     lemon poppy seed cake
235                 plum cake
233     cinnamon star cookies
Name: Name, dtype: object


In [None]:
print("\nCollaborative Filtering Recommendations:")
print(get_collaborative_recommendations('christmas cake', df, recommender, rating_matrix))


Collaborative Filtering Recommendations:
0                                  french pork chop
1                                  egg in a blanket
2                                    chicken paella
3    couscous with ratatouille - tangy tomato sauce
4                                 prawn potato soup
Name: Name, dtype: object


In [None]:
import pickle

with open('tfidf_matrix.pkl', 'wb') as file:
    pickle.dump(tfidf_matrix, file)

with open('cosine_sim.pkl', 'wb') as file:
    pickle.dump(cosine_sim, file)

In [None]:
with open('count_matrix.pkl', 'wb') as file:
    pickle.dump(count_matrix, file)

with open('cosine_sim2.pkl', 'wb') as file:
    pickle.dump(cosine_sim2, file)

In [None]:
# Save the collaborative filtering model
with open('collaborative_model.pkl', 'wb') as file:
    pickle.dump(recommender, file)