<a href="https://colab.research.google.com/github/kvgopichand1/food_reciepe/blob/main/food_recipe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Step 1: Generate Synthetic Dataset
data = {
    "title": ["Classic Pizza", "Margherita Pizza", "Pepperoni Pizza", "Veggie Pizza", "Cheese Pizza"] * 50,
    "ingredients": [
        "flour, water, yeast, salt, tomatoes, cheese",
        "flour, water, yeast, salt, tomatoes, basil, mozzarella",
        "flour, water, yeast, salt, pepperoni, cheese, tomatoes",
        "flour, water, yeast, salt, bell peppers, onions, mushrooms, tomatoes, cheese",
        "flour, water, yeast, salt, cheese, tomatoes"
    ] * 50,
    "instructions": [
        "Mix flour, water, yeast, and salt to make dough. Let it rise. Add tomatoes and cheese. Bake at 200°C for 15 minutes.",
        "Prepare dough with flour, water, yeast, and salt. Add tomatoes, basil, and mozzarella. Bake at 200°C for 15 minutes.",
        "Make dough with flour, water, yeast, and salt. Add pepperoni, cheese, and tomatoes. Bake at 200°C for 15 minutes.",
        "Prepare dough with flour, water, yeast, and salt. Add bell peppers, onions, mushrooms, tomatoes, and cheese. Bake at 200°C for 15 minutes.",
        "Mix flour, water, yeast, and salt to make dough. Add cheese and tomatoes. Bake at 200°C for 15 minutes."
    ] * 50
}

df = pd.DataFrame(data)
df.to_csv('pizza_recipes.csv', index=False)

# Step 2: Load and Preprocess the Dataset
df = pd.read_csv('pizza_recipes.csv')
specific_food = "Pizza"
filtered_df = df[df['title'].str.contains(specific_food, case=False, na=False)]
filtered_df = filtered_df.dropna(subset=['instructions'])

# Step 3: Tokenize and Prepare the Data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(filtered_df['instructions'])
sequences = tokenizer.texts_to_sequences(filtered_df['instructions'])
word_index = tokenizer.word_index
print(f"Found {len(word_index)} unique tokens.")

max_sequence_len = 100
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_len, padding='post')

# Prepare input and output data
X = padded_sequences[:, :-1]  # Input sequences (all words except the last)
y = padded_sequences[:, -1]   # Output (the last word)
y = np.array(pd.get_dummies(y, prefix='word'))  # One-hot encode the output

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build the Model
model = Sequential()
model.add(Embedding(input_dim=len(word_index) + 1, output_dim=100, input_length=max_sequence_len-1))
model.add(LSTM(150, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 5: Train the Model
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))

# Step 6: Generate Recipes
def generate_recipe(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='post')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

seed_text = "Mix flour and water"
generated_recipe = generate_recipe(seed_text, next_words=50, max_sequence_len=max_sequence_len)
print(generated_recipe)

# Step 7: Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Save the model
model.save('recipe_generator_model.h5')

Found 30 unique tokens.
Epoch 1/20


  return self.fn(y_true, y_pred, **self._fn_kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 633ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 239ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 235ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 231ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00
Epo



Mix flour and water                                                  
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 1.0000 - loss: 0.0000e+00




Test Accuracy: 100.00%
