In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import os

In [2]:
class RecipeRecommenderModelBuilder:
    def __init__(self, file_path):
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"The file {file_path} does not exist.")
        self.df = pd.read_csv(file_path)

    def prepare_data(self):
        # Clean and process ingredients
        self.df = self.df.dropna(subset=[self.df.columns[10]])
        self.df = self.df.drop_duplicates(subset='ingredients')
        self.df = self.df[self.df['ingredients'].str.count(',') >= 2]
        self.df['ingredient_list'] = self.df.iloc[:, 10].apply(
            lambda x: ' '.join(eval(x) if isinstance(x, str) else x)
        )

    def create_vectorizer_and_vectors(self):
        self.vectorizer = TfidfVectorizer()
        self.ingredient_vectors = self.vectorizer.fit_transform(self.df['ingredient_list'])

    def save_model(self):
        self.df.to_csv('processed_recipes.csv', index=False)
        joblib.dump(self.vectorizer, 'tfidf_vectorizer.joblib')
        joblib.dump(self.ingredient_vectors, 'ingredient_vectors.joblib')

In [3]:
file_path = 'RAW_recipes.csv'
builder = RecipeRecommenderModelBuilder(file_path)
builder.prepare_data()
builder.create_vectorizer_and_vectors()
builder.save_model()