<a href="https://colab.research.google.com/github/raj-027/Sanskrit-NLP/blob/main/Metric_Learning_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import json
import unicodedata
from tqdm import tqdm
import torch
import torch.nn as nn


In [None]:

def load_phoneme_vectors(csv_path):
    df = pd.read_csv(csv_path)

    phoneme_col = df.columns[0]
    feature_cols = df.columns[1:]

    phoneme2vec = {}
    for _, row in df.iterrows():
        phoneme = row[phoneme_col]
        vector = row[feature_cols].astype(int).values
        phoneme2vec[phoneme] = vector

    feature_dim = len(feature_cols)
    return phoneme2vec, feature_dim


In [None]:
phoneme2vec, FEATURE_DIM = load_phoneme_vectors("/content/drive/MyDrive/Sanskrit NLP/sanskrit_phoneme_vectors (1).csv")


In [None]:
def load_words(txt_path):
    with open(txt_path, encoding="utf-8") as f:
        words = [line.strip() for line in f if line.strip()]
    return words

words = load_words("/content/drive/MyDrive/Sanskrit NLP/161656_RV_Terms.txt") # Changed from .gdoc to .txt
print("Total words:", len(words))

Total words: 161656


In [None]:
def normalize_word(word):
    word = unicodedata.normalize("NFC", word)
    word = word.replace("-", "")
    word = word.replace(r"^", "0")
    word = word.replace(r"t", "")
    word = word.replace(r"r", "")
    word = word.replace(r"e", "")
 # Assuming r"^", "0" should replace empty strings, check intent.
    return word

In [None]:
import re

VEDIC_MARKS = {"‡•í", "‡•ë"}
REMOVE_CHARS = {"‡•ç", "‡§Ω"}
NASAL_MAP = {
    "‡§Å": "‡§Ç"   # candrabindu ‚Üí anusvƒÅra
}

def normalize_sanskrit(word):
    # Remove Vedic accent marks
    for ch in VEDIC_MARKS:
        word = word.replace(ch, "")

    # Remove virama and avagraha
    for ch in REMOVE_CHARS:
        word = word.replace(ch, "")

    # Map nasalization
    for src, tgt in NASAL_MAP.items():
        word = word.replace(src, tgt)

    # Remove digits (Devanagari + Latin)
    word = re.sub(r"[0-9‡•¶-‡•Ø]", "", word)

    # Strip whitespace
    word = word.strip()

    return word


In [None]:
DEVANAGARI_MATRAS = {
    "‡§æ": "‡§Ü", "‡§ø": "‡§á", "‡•Ä": "‡§à", "‡•Å": "‡§â", "‡•Ç": "‡§ä",
    "‡•É": "‡§ã", "‡•Ñ": "‡•†", "‡•¢": "‡§å", "‡•£": "‡•°",
    "‡•á": "‡§è", "‡•à": "‡§ê", "‡•ã": "‡§ì", "‡•å": "‡§î",
}

# independent vowels set
DEVANAGARI_VOWELS = set(list("‡§Ö‡§Ü‡§á‡§à‡§â‡§ä‡§ã‡•†‡§å‡•°‡§è‡§ê‡§ì‡§î"))

# consonants range roughly (‡§ï..‡§π) ‚Äî we'll treat these as consonants
# include nukta & other combining marks handled separately
DEVANAGARI_CONSONANTS = set(list(
    "‡§ï‡§ñ‡§ó‡§ò‡§ô‡§ö‡§õ‡§ú‡§ù‡§û‡§ü‡§†‡§°‡§¢‡§£‡§§‡§•‡§¶‡§ß‡§®‡§™‡§´‡§¨‡§≠‡§Æ‡§Ø‡§±‡§≤‡§µ‡§∂‡§∑‡§∏‡§π"
))
# add retroflex/rule variants if needed (adjust per data)
# Halant, nukta, anusvara, visarga
HALANT = "\u094D"     # ‡•ç
NUKTA = "\u093C"      # ‡§º
ANUSVARA = "‡§Ç"
VISARGA = "‡§É"
CANDRABINDU = "‡§Å"

PHONETIC_MODIFIERS = {ANUSVARA, VISARGA, CANDRABINDU}

def words_to_phonemes(word):

    word = unicodedata.normalize("NFC", word.strip())
    phonemes = []
    i = 0
    chars = list(word)

    while i < len(chars):
        ch = chars[i]

        # independent vowel
        if ch in DEVANAGARI_VOWELS:
            phonemes.append(ch)
            i += 1
            continue

        # modifier symbols that act like separate phonemes (anusvara/visarga)
        if ch in PHONETIC_MODIFIERS:
            phonemes.append(ch)
            i += 1
            continue

        # consonant (including possible nukta immediately after)
        if ch in DEVANAGARI_CONSONANTS:
            base = ch
            i += 1
            # nukta (rare) e.g. ‡§ï‡§º
            if i < len(chars) and chars[i] == NUKTA:
                base = base + chars[i]
                i += 1

            # halant means explicit consonant without inherent vowel
            if i < len(chars) and chars[i] == HALANT:
                # append base (consonant) only, skip halant
                phonemes.append(base)
                i += 1
                continue

            # vowel matra attached? map to independent vowel and append base+vowel
            if i < len(chars) and chars[i] in DEVANAGARI_MATRAS:
                mat = chars[i]
                vowel = DEVANAGARI_MATRAS[mat]
                phonemes.append(base)
                phonemes.append(vowel)
                i += 1
                continue


            phonemes.append(base)
            continue

        # standalone matra (shouldn't usually happen), convert to vowel
        if ch in DEVANAGARI_MATRAS:
            phonemes.append(DEVANAGARI_MATRAS[ch])
            i += 1
            continue

        # otherwise: unknown char, append as-is (fallback)
        phonemes.append(ch)
        i += 1

    return phonemes

In [None]:
print(words_to_phonemes("‡§Ö‡§ó‡•ç‡§®‡§ø‡§Æ‡•ç"))

print(words_to_phonemes("‡§ß‡§∞‡•ç‡§Æ"))

print(words_to_phonemes("‡§ï‡§∞‡•ç‡§Æ"))


['‡§Ö', '‡§ó', '‡§®', '‡§á', '‡§Æ']
['‡§ß', '‡§∞', '‡•ç', '‡§Æ']
['‡§ï', '‡§∞', '‡•ç', '‡§Æ']


In [None]:
def phonemes_to_features(phonemes, phoneme2vec):
    features = []
    for p in phonemes:
        if p not in phoneme2vec:
            return None   # skip word if phoneme missing
        features.append(phoneme2vec[p])
    return np.stack(features)


In [None]:
from collections import Counter
import numpy as np
import json

def preprocess_dataset(words, phoneme2vec):
    dataset = []
    skipped_words = []
    skip_reasons = Counter()

    for word in words:
        original_word = word
        word = normalize_sanskrit(normalize_word(word))

        # Phonemization
        try:
            phonemes = words_to_phonemes(word)
        except Exception as e:
            skip_reasons["phonemizer_error"] += 1
            skipped_words.append({
                "word": original_word,
                "normalized": word,
                "reason": "phonemizer_error"
            })
            continue

        # Missing phoneme vectors
        missing = [p for p in phonemes if p not in phoneme2vec]
        if missing:
            skip_reasons[f"missing_phoneme:{missing[0]}"] += 1
            skipped_words.append({
                "word": original_word,
                "normalized": word,
                "phonemes": phonemes,
                "missing_phoneme": missing[0],
                "reason": "missing_phoneme"
            })
            continue

        # Valid word
        features = np.stack([phoneme2vec[p] for p in phonemes])
        dataset.append({
            "word": word,
            "phonemes": phonemes,
            "features": features
        })

    print("\n SKIP SUMMARY ")
    print("Total skipped:", len(skipped_words))
    for k, v in skip_reasons.most_common(15):
        print(f"{k:25s} : {v}")

    return dataset, skipped_words


In [None]:
dataset, skipped_words_info = preprocess_dataset(words, phoneme2vec)

# Convert NumPy arrays to lists for JSON serialization
serializable_dataset = []
for item in dataset:
    serializable_item = item.copy()
    serializable_item["features"] = item["features"].tolist()
    serializable_dataset.append(serializable_item)

with open("sanskrit_metric_learning_dataset.json", "w", encoding="utf-8") as f:
    json.dump(serializable_dataset, f, ensure_ascii=False, indent=2)


==== SKIP SUMMARY ====
Total skipped: 3138
missing_phoneme:          : 3137
missing_phoneme:m         : 1


In [None]:
import numpy as np

def substitution_cost(v1, v2):
    """
    v1, v2: (feature_dim,) ternary vectors
    """
    return np.mean(np.abs(v1 - v2))

def articulatory_distance(seq1, seq2):
    """
    seq1, seq2: List[np.ndarray]  (phoneme feature sequences)
    """
    n, m = len(seq1), len(seq2)

    dp = np.zeros((n + 1, m + 1))

    for i in range(1, n + 1):
        dp[i, 0] = i
    for j in range(1, m + 1):
        dp[0, j] = j

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            sub = substitution_cost(seq1[i - 1], seq2[j - 1])
            dp[i, j] = min(
                dp[i - 1, j] + 1,      # deletion
                dp[i, j - 1] + 1,      # insertion
                dp[i - 1, j - 1] + sub # substitution
            )

    return dp[n, m]



In [None]:
class PhoneticEncoder(nn.Module):
    def __init__(self, feature_dim, hidden_dim=128, emb_dim=64):
        super().__init__()

        self.lstm = nn.LSTM(
            input_size=feature_dim,
            hidden_size=hidden_dim,
            batch_first=True
        )
        self.proj = nn.Linear(hidden_dim, emb_dim)

    def forward(self, x, lengths):

        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        _, (h_n, _) = self.lstm(packed)
        return self.proj(h_n[-1])


In [None]:
def metric_learning_loss(emb_a, emb_b, art_dist):
    """
    emb_a, emb_b: (B, D)
    art_dist: (B,)
    """
    emb_dist = torch.norm(emb_a - emb_b, dim=1)
    return ((emb_dist - art_dist) ** 2).mean()


In [None]:
class PhoneticEncoder(nn.Module):
    def __init__(self, feature_dim, hidden_dim, emb_dim):
        super().__init__()
        self.lstm = nn.LSTM(feature_dim, hidden_dim, batch_first=True)
        self.proj = nn.Linear(hidden_dim, emb_dim)

    def forward(self, x, lengths):
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        _, (h_n, _) = self.lstm(packed)
        emb = self.proj(h_n[-1])
        return emb


In [None]:
def metric_loss(emb1, emb2, art_dist):
    emb_dist = torch.norm(emb1 - emb2, dim=1)
    return ((emb_dist - art_dist) ** 2).mean()


In [None]:
from torch.utils.data import Dataset
import random


In [None]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(
    dataset,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

print(f"Train size: {len(train_data)}")
print(f"Validation size: {len(val_data)}")


Train size: 126814
Validation size: 31704


In [None]:
class MetricDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        j = random.randint(0, len(self.data) - 1)
        return idx, j


In [None]:
def make_collate_fn(data):
    def collate_fn(batch):
        idx_a, idx_b = zip(*batch)

        def pad(idxs):
            seqs = [data[i]["features"] for i in idxs]
            lengths = torch.tensor([len(s) for s in seqs])

            max_len = max(lengths)
            feature_dim = len(seqs[0][0])

            padded = torch.zeros(len(seqs), max_len, feature_dim)
            for i, s in enumerate(seqs):
                padded[i, :len(s)] = torch.tensor(s)

            return padded.float(), lengths

        xa, la = pad(idx_a)
        xb, lb = pad(idx_b)

        art_dist = torch.tensor([
            articulatory_distance(
                data[a]["features"],
                data[b]["features"]
            )
            for a, b in zip(idx_a, idx_b)
        ]).float()

        return xa, la, xb, lb, art_dist

    return collate_fn


In [None]:
from torch.utils.data import DataLoader
from functools import lru_cache


In [None]:
@lru_cache(maxsize=100_000)
def cached_art_dist(a_id, b_id):
    return articulatory_distance(
        dataset[a_id]["features"],
        dataset[b_id]["features"]
    )


In [None]:
train_loader = DataLoader(
    MetricDataset(train_data),
    batch_size=32,
    shuffle=True,
    collate_fn=make_collate_fn(train_data)
    )
val_loader = DataLoader(
    MetricDataset(val_data),
    batch_size=32,
    shuffle=False,
    collate_fn=make_collate_fn(val_data)
    )

In [None]:
def train(
    model,
    train_loader,
    val_loader,
    epochs=100,
    patience=4,
    lr=1e-3
):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_val_loss = float("inf")
    patience_counter = 0

    for epoch in range(1, epochs + 1):

        # TRAIN
        model.train()
        train_loss = 0.0

        for xa, la, xb, lb, art_dist in train_loader:
            xa, la = xa.to(device), la.to(device)
            xb, lb = xb.to(device), lb.to(device)
            art_dist = art_dist.to(device)

            ea = model(xa, la)
            eb = model(xb, lb)

            loss = metric_loss(ea, eb, art_dist)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)

        #  VALIDATION
        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for xa, la, xb, lb, art_dist in val_loader:
                xa, la = xa.to(device), la.to(device)
                xb, lb = xb.to(device), lb.to(device)
                art_dist = art_dist.to(device)

                ea = model(xa, la)
                eb = model(xb, lb)

                loss = metric_loss(ea, eb, art_dist)
                val_loss += loss.item()

        val_loss /= len(val_loader)

        #  LOGGING
        print(
            f"Epoch {epoch:03d} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val Loss: {val_loss:.4f}"
        )

        #  EARLY STOPPING
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0

            torch.save(
                model.state_dict(),
                "sanskrit_metric_learning_model.pt"
            )

            print(" Best model saved")

        else:
            patience_counter += 1
            print(f"No improvement ({patience_counter}/{patience})")

            if patience_counter >= patience:
                print("Early stopping triggered (overfitting detected)")
                break


In [None]:
model = PhoneticEncoder(FEATURE_DIM, hidden_dim=128, emb_dim=64)
train(
    model,
    train_loader,
    val_loader,
    epochs=200,
    patience=10
)

Epoch 001 | Train Loss: 0.0952 | Val Loss: 0.0371
   ‚úì Best model saved
Epoch 002 | Train Loss: 0.0321 | Val Loss: 0.0263
   ‚úì Best model saved
Epoch 003 | Train Loss: 0.0255 | Val Loss: 0.0233
   ‚úì Best model saved
Epoch 004 | Train Loss: 0.0225 | Val Loss: 0.0202
   ‚úì Best model saved
Epoch 005 | Train Loss: 0.0210 | Val Loss: 0.0244
   ‚úó No improvement (1/10)
Epoch 006 | Train Loss: 0.0204 | Val Loss: 0.0209
   ‚úó No improvement (2/10)
Epoch 007 | Train Loss: 0.0196 | Val Loss: 0.0185
   ‚úì Best model saved
Epoch 008 | Train Loss: 0.0192 | Val Loss: 0.0257
   ‚úó No improvement (1/10)
Epoch 009 | Train Loss: 0.0186 | Val Loss: 0.0185
   ‚úó No improvement (2/10)
Epoch 010 | Train Loss: 0.0184 | Val Loss: 0.0173
   ‚úì Best model saved
Epoch 011 | Train Loss: 0.0179 | Val Loss: 0.0171
   ‚úì Best model saved
Epoch 012 | Train Loss: 0.0176 | Val Loss: 0.0170
   ‚úì Best model saved
Epoch 013 | Train Loss: 0.0177 | Val Loss: 0.0179
   ‚úó No improvement (1/10)
Epoch 014 | T

In [None]:
import torch
import numpy as np
from scipy.spatial.distance import cosine, euclidean

device = "cuda" if torch.cuda.is_available() else "cpu"

model = PhoneticEncoder(FEATURE_DIM,
        hidden_dim=128,
        emb_dim=64)
model.load_state_dict(torch.load("/content/sanskrit_metric_learning_model.pt", map_location=device))
model.to(device)
model.eval()


PhoneticEncoder(
  (lstm): LSTM(34, 128, batch_first=True)
  (proj): Linear(in_features=128, out_features=64, bias=True)
)

In [None]:
def get_embedding(word):
    word = normalize_word(word)
    phonemes = words_to_phonemes(word)

    features = phonemes_to_features(phonemes, phoneme2vec)
    if features is None:
        raise ValueError(f"Unknown phoneme in word: {word}")

    x = torch.tensor(features).unsqueeze(0).float()  # (1, T, F)
    lengths = torch.tensor([x.shape[1]])

    with torch.no_grad():
        emb = model(x.to(device), lengths.to(device))

    return emb.squeeze(0).cpu().numpy()

In [None]:
def compare_words(word1, word2):
    e1 = get_embedding(word1)
    e2 = get_embedding(word2)

    cos_dist = cosine(e1, e2)
    euc_dist = euclidean(e1, e2)

    return {
        "word1": word1,
        "word2": word2,
        "cosine_distance": float(cos_dist),
        "euclidean_distance": float(euc_dist)
    }


In [None]:
word_pairs = [
    ("‡§Æ‡§æ‡§¨‡§ø", "‡§Æ‡§æ‡§≠‡•Ä‡§≠"),
    ("‡§á‡§π‡•à‡§ß‡§ø", "‡§Ø‡§π‡•á‡§ß‡•Ä"),
    ("‡§á‡§∑‡•Å‡§∞‡§ø", "‡§á‡§∂‡•Ç‡§∞‡•Ä"),
    ("‡§â‡§¶‡•á‡§£‡•Ä", "‡§â‡§ß‡•á‡§®‡§ø"),
    ("‡§á‡§Ø‡§Æ‡§®‡•ç", "‡§Ø‡§Æ‡§®‡•ç‡§§"),
    ("‡§®‡§µ‡§™‡•ç", "‡§™‡•ç‡§∞‡§æ‡§£")
]

results = []
for word1, word2 in word_pairs:
    try:
        results.append(compare_words(word1, word2))
    except ValueError as e:
        results.append({"word1": word1, "word2": word2, "error": str(e)})

for res in results:
    if "error" in res:
        print(f"{res['word1']} - {res['word2']} | Error: {res['error']}")
    else:
        print(f"{res['word1']} - {res['word2']} | Euclidean distance: {res['euclidean_distance']:.4f} | Cosine distance: {res['cosine_distance']:.4f}")

‡§Æ‡§æ‡§¨‡§ø - ‡§Æ‡§æ‡§≠‡•Ä‡§≠ | Euclidean distance: 1.6518 | Cosine distance: 0.1964
‡§á‡§π‡•à‡§ß‡§ø - ‡§Ø‡§π‡•á‡§ß‡•Ä | Euclidean distance: 1.1449 | Cosine distance: 0.0806
‡§á‡§∑‡•Å‡§∞‡§ø - ‡§á‡§∂‡•Ç‡§∞‡•Ä | Euclidean distance: 0.2620 | Cosine distance: 0.0044
‡§â‡§¶‡•á‡§£‡•Ä - ‡§â‡§ß‡•á‡§®‡§ø | Euclidean distance: 0.2912 | Cosine distance: 0.0052
‡§á‡§Ø‡§Æ‡§®‡•ç - ‡§Ø‡§Æ‡§®‡•ç‡§§ | Euclidean distance: 0.8898 | Cosine distance: 0.1030
‡§®‡§µ‡§™‡•ç - ‡§™‡•ç‡§∞‡§æ‡§£ | Euclidean distance: 1.4128 | Cosine distance: 0.2879


epochs = 63, patience = 10
* ‡§Æ‡§æ‡§¨‡§ø - ‡§Æ‡§æ‡§≠‡•Ä‡§≠ | Euclidean distance: 1.6518 | Cosine distance: 0.1964
* ‡§á‡§π‡•à‡§ß‡§ø - ‡§Ø‡§π‡•á‡§ß‡•Ä | Euclidean distance: 1.1449 | Cosine distance: 0.0806
* ‡§á‡§∑‡•Å‡§∞‡§ø - ‡§á‡§∂‡•Ç‡§∞‡•Ä | Euclidean distance: 0.2620 | Cosine distance: 0.0044
* ‡§â‡§¶‡•á‡§£‡•Ä - ‡§â‡§ß‡•á‡§®‡§ø | Euclidean distance: 0.2912 | Cosine distance: 0.0052
* ‡§á‡§Ø‡§Æ‡§®‡•ç - ‡§Ø‡§Æ‡§®‡•ç‡§§ | Euclidean distance: 0.8898 | Cosine distance: 0.1030
* ‡§®‡§µ‡§™‡•ç - ‡§™‡•ç‡§∞‡§æ‡§£ | Euclidean distance: 1.4128 | Cosine distance: 0.2879


In [None]:
def word_to_articulatory_sequence(word):
    word = normalize_sanskrit(word)
    phonemes = words_to_phonemes(word)

    seq = []
    for p in phonemes:
        if p not in phoneme2vec:
            raise ValueError(f"Unknown phoneme: {p}")
        seq.append(phoneme2vec[p])

    return seq


In [None]:
def articulatory_word_distance(word1, word2):
    seq1 = word_to_articulatory_sequence(word1)
    seq2 = word_to_articulatory_sequence(word2)
    return articulatory_distance(seq1, seq2)


In [None]:
def articulatory_distance_for_pairs(word_pairs):
    results = []

    for w1, w2 in word_pairs:
        try:
            dist = articulatory_word_distance(w1, w2)
            results.append({
                "word1": w1,
                "word2": w2,
                "articulatory_distance": dist
            })
        except Exception as e:
            results.append({
                "word1": w1,
                "word2": w2,
                "articulatory_distance": None,
                "error": str(e)
            })

    return results


In [None]:
def print_articulatory_results(results):
    for res in results:
        if res["articulatory_distance"] is not None:
            print(
                f"{res['word1']} - {res['word2']} | "
                f"Articulatory distance: {res['articulatory_distance']:.4f}"
            )
        else:
            print(
                f"{res['word1']} - {res['word2']} | "
                f"FAILED ({res.get('error', 'unknown error')})"
            )


In [None]:
word_pairs = [
    ("‡§Æ‡§æ‡§¨‡§ø", "‡§Æ‡§æ‡§≠‡•Ä‡§≠"),
    ("‡§á‡§π‡•à‡§ß‡§ø", "‡§Ø‡§π‡•á‡§ß‡•Ä"),
    ("‡§á‡§∑‡•Å‡§∞‡§ø", "‡§á‡§∂‡•Ç‡§∞‡•Ä"),
    ("‡§â‡§¶‡•á‡§£‡•Ä", "‡§â‡§ß‡•á‡§®‡§ø"),
    ("‡§á‡§Ø‡§Æ‡§®‡•ç", "‡§Ø‡§Æ‡§®‡•ç‡§§"),
    ("‡§®‡§µ‡§™‡•ç", "‡§™‡•ç‡§∞‡§æ‡§£")
]

results = articulatory_distance_for_pairs(word_pairs)
print_articulatory_results(results)


‡§Æ‡§æ‡§¨‡§ø - ‡§Æ‡§æ‡§≠‡•Ä‡§≠ | Articulatory distance: 1.1176
‡§á‡§π‡•à‡§ß‡§ø - ‡§Ø‡§π‡•á‡§ß‡•Ä | Articulatory distance: 0.8824
‡§á‡§∑‡•Å‡§∞‡§ø - ‡§á‡§∂‡•Ç‡§∞‡•Ä | Articulatory distance: 0.1765
‡§â‡§¶‡•á‡§£‡•Ä - ‡§â‡§ß‡•á‡§®‡§ø | Articulatory distance: 0.1765
‡§á‡§Ø‡§Æ‡§®‡•ç - ‡§Ø‡§Æ‡§®‡•ç‡§§ | Articulatory distance: 0.8235
‡§®‡§µ‡§™‡•ç - ‡§™‡•ç‡§∞‡§æ‡§£ | Articulatory distance: 1.5294


* ‡§Æ‡§æ‡§¨‡§ø - ‡§Æ‡§æ‡§≠‡•Ä‡§≠ | Articulatory distance: 1.1176
* ‡§á‡§π‡•à‡§ß‡§ø - ‡§Ø‡§π‡•á‡§ß‡•Ä | Articulatory distance: 0.8824
* ‡§á‡§∑‡•Å‡§∞‡§ø - ‡§á‡§∂‡•Ç‡§∞‡•Ä | Articulatory distance: 0.1765
* ‡§â‡§¶‡•á‡§£‡•Ä - ‡§â‡§ß‡•á‡§®‡§ø | Articulatory distance: 0.1765
* ‡§á‡§Ø‡§Æ‡§®‡•ç - ‡§Ø‡§Æ‡§®‡•ç‡§§ | Articulatory distance: 0.8235
* ‡§®‡§µ‡§™‡•ç - ‡§™‡•ç‡§∞‡§æ‡§£ | Articulatory distance: 1.5294