In [2]:
import tensorflow as tf 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Embedding, LSTM, Dense 
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences 
import numpy as np 
import regex as re 


In [2]:
def file_to_sentence_list(file_path): 
	with open(file_path, 'r') as file: 
		text = file.read() 

	# Splitting the text into sentences using 
	# delimiters like '.', '?', and '!' 
	sentences = [sentence.strip() for sentence in re.split( 
		r'(?<=[.!?])\s+', text) if sentence.strip()] 

	return sentences 

file_path = 'Indian_Dishes.txt'
text_data = file_to_sentence_list(file_path) 

# Tokenize the text data 
tokenizer = Tokenizer() 
tokenizer.fit_on_texts(text_data) 
total_words = len(tokenizer.word_index) + 1

# Create input sequences 
input_sequences = [] 
for line in text_data: 
	token_list = tokenizer.texts_to_sequences([line])[0] 
	for i in range(1, len(token_list)): 
		n_gram_sequence = token_list[:i+1] 
		input_sequences.append(n_gram_sequence) 

# Pad sequences and split into predictors and label 
max_sequence_len = max([len(seq) for seq in input_sequences]) 
input_sequences = np.array(pad_sequences( 
	input_sequences, maxlen=max_sequence_len, padding='pre')) 
X, y = input_sequences[:, :-1], input_sequences[:, -1] 

# Convert target data to one-hot encoding 
y = tf.keras.utils.to_categorical(y, num_classes=total_words) 


In [6]:
model = Sequential([
    Embedding(total_words, 10),
    LSTM(128),
    Dense(total_words, activation='softmax')
])


In [8]:

model.compile(loss='categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy']) 

In [9]:
# Train the model 

model.fit(X, y, epochs=500, verbose=1) 


Epoch 1/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 63ms/step - accuracy: 0.0040 - loss: 4.7700  
Epoch 2/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.0462 - loss: 4.7597
Epoch 3/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0536 - loss: 4.7016
Epoch 4/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0513 - loss: 4.5427
Epoch 5/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.0518 - loss: 4.5535
Epoch 6/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.0715 - loss: 4.5184
Epoch 7/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - accuracy: 0.0545 - loss: 4.4937
Epoch 8/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.0462 - loss: 4.5014
Epoch 9/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x30e33c620>

In [13]:

# Generate next word predictions 
seed_text = "Chi"
next_words = 5
  
for _ in range(next_words): 
    token_list = tokenizer.texts_to_sequences([seed_text])[0] 
    token_list = pad_sequences( 
        [token_list], maxlen=max_sequence_len-1, padding='pre') 
    predicted_probs = model.predict(token_list) 
    predicted_word = tokenizer.index_word[np.argmax(predicted_probs)] 
    seed_text += " " + predicted_word 
  
print("Next predicted words:", seed_text) 



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Next predicted words: Chi chicken biryani chicken salad garlic


In [15]:
# Save the Keras model as HDF5 file
model.save('model.h5')

# Load the Keras model
from tensorflow.keras.models import load_model
loaded_model = load_model('model.h5')




In [4]:
import joblib
data = [
    "Chicken Biryani", "Chicken Salad", "Garlic Mushroom", "Fruit Ice Cream", "Paneer Biryani",
    "Vegetable Salad", "Rogan Josh", "Butter Chicken", "Palak Paneer", "Chole Bhature",
    "Masala Dosa", "Vada Pav", "Pani Puri", "Aloo Gobi", "Samosa", "Dal Makhani",
    "Tandoori Chicken", "Naan", "Rajma", "Mutton Biryani", "Fish Curry", "Kheer",
    "Jalebi", "Rasmalai", "Gulab Jamun", "Pav Bhaji", "Bhindi Masala", "Aloo Paratha",
    "Tandoori Roti", "Mango Lassi", "Hyderabadi Biryani", "Lamb Vindaloo", "Keema Naan",
    "Gajar Ka Halwa", "Chicken Tikka Masala", "Malai Kofta", "Vegetable Pulao", "Kadhai Paneer",
    "Prawns Masala", "Baingan Bharta", "Saffron Rice", "Tomato Soup", "Paneer Tikka",
    "Chicken 65", "Korma", "Bisi Bele Bath", "Fish Fry", "Goan Fish Curry", "Ladoo",
    "Dahi Vada", "Lamb Curry", "Paneer Butter Masala", "Shahi Paneer", "Bhatura",
    "Moong Dal Halwa", "Kulfi", "Egg Curry", "Beef Curry", "Coconut Rice", "Kashmiri Pulao",
    "Carrot Halwa", "Chicken Lollipop", "Dum Aloo", "Rasam", "Idli", "Sambhar", "Chicken Masala",
    "Mix Veg Curry", "Kofta", "Puliyogare", "Kadhi Pakoda", "Aloo Matar", "Methi Matar Malai",
    "Uttapam", "Masoor Dal", "Buttermilk", "Papad", "Green Salad", "Kadala Curry", "Paneer Kulcha",
    "Lassi", "Amritsari Fish", "Chana Dal", "Rava Idli", "Paneer Pasanda", "Lamb Rogan Josh",
    "Kesar Peda", "Besan Ladoo", "Dahi Puri", "Stuffed Paratha", "Chicken Do Pyaza",
    "Mushroom Masala", "Tawa Paneer", "Masala Papad", "Paneer Lababdar", "Paneer Jalfrezi",
    "Chapati", "Dal Tadka", "Sev Puri", "Bhel Puri", "Mushroom Biryani", "Egg Biryani", "Paneer Biryani"
]

# Create a tokenizer and fit on texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
joblib.dump(tokenizer, 'tokenizer.pkl')

['tokenizer.pkl']