In [1]:
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

In [3]:
faq_sentences = [
 "How can I reset my password?",
 "Where is the library located?",
 "What is Artificial Intelligence?",
 "How to apply for a scholarship?",
 "What are the cafeteria opening hours?"
]

In [5]:
def get_embedding(sentence):
 inputs = tokenizer(sentence, return_tensors="pt",truncation=True, padding=True)
 with torch.no_grad():
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).numpy()

In [6]:
faq_embeddings = [get_embedding(sent) for sent in faq_sentences]

In [7]:
query = "what is AI"
query_embedding = get_embedding(query)

In [10]:
similarities = [cosine_similarity(query_embedding, emb)[0][0] for emb in faq_embeddings]

In [11]:
best_match_index = similarities.index(max(similarities))
print(f"Student Question: {query}")
print(f"Most Similar FAQ: {faq_sentences[best_match_index]}")

Student Question: what is AI
Most Similar FAQ: What is Artificial Intelligence?


In [12]:
#for cricketers name
cricketers = {
 "MS Dhoni": "India",
 "Virat Kohli": "India",
 "Ricky Ponting": "Australia",
 "Steve Smith": "Australia",
 "Kane Williamson": "New Zealand",
 "Joe Root": "England"
}

In [13]:
def get_embedding(text):
 inputs = tokenizer(text, return_tensors="pt", truncation=True,padding=True)
 with torch.no_grad():
     outputs = model(**inputs)
     return outputs.last_hidden_state.mean(dim=1).numpy()

In [14]:
def check_country_similarity(player, country):
 player_emb = get_embedding(player)
 country_emb = get_embedding(country)
 similarity_score = cosine_similarity(player_emb,country_emb)[0][0]
 return similarity_score

In [15]:
test_cases = [
 ("MS Dhoni", "India"),
 ("MS Dhoni", "Australia"),
 ("Ricky Ponting", "Australia"),
 ("Virat Kohli", "India"),
 ("Kane Williamson", "New Zealand"),
 ("Joe Root", "India")
]

In [19]:
threshold = 0.53 # You can adjust this
for player, country in test_cases:
 score = check_country_similarity(player, country)
 match = "MATCH " if score >= threshold else "NO MATCH "
 print(f"{player} - {country} | Similarity: {score:.2f} | {match}")

MS Dhoni - India | Similarity: 0.64 | MATCH 
MS Dhoni - Australia | Similarity: 0.58 | MATCH 
Ricky Ponting - Australia | Similarity: 0.55 | MATCH 
Virat Kohli - India | Similarity: 0.53 | NO MATCH 
Kane Williamson - New Zealand | Similarity: 0.55 | MATCH 
Joe Root - India | Similarity: 0.68 | MATCH 
