In [1]:
!pip install faiss-cpu



In [2]:
import pandas as pd
import numpy as np
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer

filename = 'names_dataset.csv'

try:
    df = pd.read_csv(filename)
    print(f"‚úÖ Successfully loaded '{filename}'")

    df['Name'] = df['Name'].astype(str)

    names_list = df['Name'].tolist()
    print(f"üìä Total Names in Database: {len(names_list)}")
    print(df.head())

except FileNotFoundError:
    print(f"‚ùå Error: File '{filename}' not found.")
    print("Please upload 'names_dataset.csv' to the Colab Files section.")

‚úÖ Successfully loaded 'names_dataset.csv'
üìä Total Names in Database: 100
   ID    Name
0   1  Geetha
1   2    Gita
2   3   Geeta
3   4    Gitu
4   5   Githa


In [3]:
vectorizer = TfidfVectorizer(analyzer='char_wb', ngram_range=(2, 4))

name_vectors = vectorizer.fit_transform(names_list)

dataset_vectors = name_vectors.toarray().astype('float32')

faiss.normalize_L2(dataset_vectors)

dimension = dataset_vectors.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(dataset_vectors)

print(f"‚úÖ FAISS Index built with {index.ntotal} vectors of dimension {dimension}.")

‚úÖ FAISS Index built with 100 vectors of dimension 676.


In [4]:
def search_name(query, k=5):
    query_vec = vectorizer.transform([query]).toarray().astype('float32')

    faiss.normalize_L2(query_vec)

    D, I = index.search(query_vec, k)

    return D[0], I[0]


if 'index' in locals():
    user_input = input("Enter a name to search: ")

    if user_input.strip():
        scores, indices = search_name(user_input, k=5)

        print("\n" + "="*50)
        print(f"üîç FAISS SEARCH RESULTS FOR: '{user_input}'")
        print("="*50)

        if scores[0] < 0.1:
             print("‚ö†Ô∏è No relevant matches found.")
        else:
            best_idx = indices[0]
            best_score = scores[0]
            best_name = names_list[best_idx]

            print(f"\nüåü BEST MATCH:")
            print(f"   Name:  {best_name}")
            print(f"   Relevance Score: {best_score:.4f}")

            print(f"\nüìã RELEVANT NAMES (Decreasing Order):")
            print(f"   {'-'*40}")
            print(f"   {'Rank':<5} | {'Name':<20} | {'Score'}")
            print(f"   {'-'*40}")

            for rank, (score, idx) in enumerate(zip(scores, indices)):
                if score > 0:
                    found_name = names_list[idx]
                    print(f"   {rank+1:<5} | {found_name:<20} | {score:.4f}")
    else:
        print("‚ö†Ô∏è Please enter a valid name.")
else:
    print("‚ö†Ô∏è Please run Cell 3 first to build the index.")

Enter a name to search: rambo

üîç FAISS SEARCH RESULTS FOR: 'rambo'

üåü BEST MATCH:
   Name:  Rajes
   Relevance Score: 0.3021

üìã RELEVANT NAMES (Decreasing Order):
   ----------------------------------------
   Rank  | Name                 | Score
   ----------------------------------------
   1     | Rajes                | 0.3021
   2     | Rajesh               | 0.2841
   3     | Rajish               | 0.2839
   4     | Rajeev               | 0.2744
   5     | Rajiev               | 0.2669
