In [1]:
import numpy as np
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model

In [2]:
model = load_model("saved_model/simple_rnn_model.h5")

with open("saved_model/tokenizer.pkl", "rb") as f:
    tokenizer, label_encoder = pickle.load(f)

print("Classes:", label_encoder.classes_)



Classes: ['Arabic' 'Danish' 'Dutch' 'English' 'French' 'German' 'Greek' 'Hindi'
 'Italian' 'Kannada' 'Malayalam' 'Portugeese' 'Russian' 'Spanish'
 'Sweedish' 'Tamil' 'Turkish']


In [3]:
def predict_language(text):
    # Convert text → sequence
    seq = tokenizer.texts_to_sequences([text])

    # SAME padding length as training
    padded = pad_sequences(seq, maxlen=80) # Maxlen should be same as training

    # Predict
    preds = model.predict(padded)
    class_index = np.argmax(preds)
    # Converts numeric label back to actual language
    # Also returns probability distribution
    return label_encoder.inverse_transform([class_index])[0], preds[0]
    # label_encoder.inverse_transform([class_index]) ==> [0 → "English", 1 → "Spanish", 2 → "French", 3 → "German", 4 → "Italian"]
    # [class_index] = [2] → "French"

In [4]:
text = "यह एक अच्छा दिन है"

lang, probs = predict_language(text)

print("Text:", text)
print("Predicted Language:", lang)
print("Top 3 probabilities:")

# argsort() → sorts probabilities
# [-3:] → selects top 3
# [::-1] → descending order
top3 = np.argsort(probs)[-3:][::-1]
for i in top3:
    print(label_encoder.classes_[i], ":", round(probs[i], 3))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
Text: यह एक अच्छा दिन है
Predicted Language: Hindi
Top 3 probabilities:
Hindi : 0.839
Portugeese : 0.064
Spanish : 0.03


In [5]:
texts = [
    "This is a beautiful day",
    "Ceci est une belle journée",
    "Este es un buen día",
    "यह एक अच्छा दिन है",
    "ഇത് ഒരു നല്ല ദിവസമാണ്"
]

for text in texts:
    lang, probs = predict_language(text)
    
    print(f"Text: {text}")
    print(f"Predicted Language: {lang}")
    
    top3 = np.argsort(probs)[-3:][::-1]
    for i in top3:
        print(f"  {label_encoder.classes_[i]} : {round(probs[i], 3)}")
    
    print("-" * 60)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Text: This is a beautiful day
Predicted Language: English
  English : 0.9990000128746033
  Dutch : 0.0010000000474974513
  Portugeese : 0.0
------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Text: Ceci est une belle journée
Predicted Language: French
  French : 0.9950000047683716
  Danish : 0.003000000026077032
  Sweedish : 0.0020000000949949026
------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Text: Este es un buen día
Predicted Language: Spanish
  Spanish : 0.9990000128746033
  Turkish : 0.0
  Italian : 0.0
------------------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Text: यह एक अच्छा दिन है
Predicted Language: Hindi
  Hindi : 0.8389999866485596
  Portugeese : 0.06400000303983688
