<a href="https://colab.research.google.com/github/krishmun/speechtherapyrecommendationsystem/blob/main/recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.corpus import cmudict, wordnet
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download the CMU Pronouncing Dictionary
nltk.download('cmudict')
nltk.download('wordnet')








[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Unzipping corpora/cmudict.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# Load the CMU Pronouncing Dictionary
pronouncing_dict = cmudict.dict()

def get_phonemes(word):
    # Get the phonetic representation of a word from the CMU Pronouncing Dictionary
    return pronouncing_dict.get(word.lower(), [])

In [None]:
def calculate_cosine_similarity(phonemes1, phonemes2):
    # Convert lists to tuples for hashability
    phonemes1_tuple = tuple(tuple(sublist) for sublist in phonemes1)
    phonemes2_tuple = tuple(tuple(sublist) for sublist in phonemes2)

    # Calculate cosine similarity between two lists of phonemes
    vectorizer = TfidfVectorizer(tokenizer=lambda x: x, lowercase=False)
    tfidf_matrix = vectorizer.fit_transform([' '.join(map(str, phonemes1_tuple)), ' '.join(map(str, phonemes2_tuple))])
    return cosine_similarity(tfidf_matrix)[0, 1]

def calculate_weighted_similarity(phonemes1, phonemes2):
    # Define weights for each phoneme (customize as needed)
    phoneme_weights = {
    'AA': 1.0, 'AE': 1.0, 'AH': 1.0, 'AO': 1.0, 'AW': 1.0, 'AY': 1.0, 'B': 1.0, 'CH': 1.0, 'D': 1.0, 'DH': 1.0,
    'EH': 1.0, 'ER': 1.0, 'EY': 1.0, 'F': 1.0, 'G': 1.0, 'HH': 1.0, 'IH': 1.0, 'IY': 1.0, 'JH': 1.0, 'K': 1.0,
    'L': 1.0, 'M': 1.0, 'N': 1.0, 'NG': 1.0, 'OW': 1.0, 'OY': 1.0, 'P': 1.0, 'R': 1.0, 'S': 1.0, 'SH': 1.0,
    'T': 1.0, 'TH': 1.0, 'UH': 1.0, 'UW': 1.0, 'V': 1.0, 'W': 1.0, 'Y': 1.0, 'Z': 1.0, 'ZH': 1.0 }


    # Convert lists to tuples for hashability
    phonemes1_tuple = tuple(tuple(sublist) for sublist in phonemes1)
    phonemes2_tuple = tuple(tuple(sublist) for sublist in phonemes2)

    # Calculate weighted similarity between two lists of phonemes
    weighted_similarity = sum(min(phoneme_weights.get(phoneme, 0.0), 1.0) for phoneme in set(phonemes1_tuple) & set(phonemes2_tuple))

    return weighted_similarity


In [None]:
def suggest_words(input_phonemes, word_list, num_suggestions=5):
    # Suggest top 5 words based on input phonemes and enhanced similarity metrics
    suggestions = []

    for word in word_list:
        word_phonemes = get_phonemes(word)

        # Calculate similarity metrics
        cosine_sim = calculate_cosine_similarity(input_phonemes, word_phonemes)
        weighted_sim = calculate_weighted_similarity(input_phonemes, word_phonemes)

        # Combine similarity metrics (you can adjust weights or combine differently)
        combined_sim = 0.7 * cosine_sim + 0.3 * weighted_sim

        suggestions.append((word, combined_sim))

    # Sort suggestions by combined similarity and return the top 5
    suggestions.sort(key=lambda x: x[1], reverse=True)
    top_suggestions = [suggestion[0] for suggestion in suggestions[:num_suggestions]]

    return top_suggestions


In [None]:
# Hardcoded input phonemes for testing
input_phonemes = ['AH0', 'B', 'S', 'T']

# Get words from the WordNet dictionary
word_list = list(set(wordnet.words()))
word_list = list(set(wordnet.words()))[:5000]

# Get top 5 suggestions
top_suggestions = suggest_words(input_phonemes, word_list, num_suggestions=10)




In [None]:
# Display suggestions
if top_suggestions:
    print("Top 5 Suggestions:")
    for suggestion in top_suggestions:
        print(suggestion)
else:
    print("No suggestions found.")

Top 5 Suggestions:
bust
bustard
dustbin
brisket
boisterous
buttoned
tubman
brunt
battle
subsystem


Top 5 Suggestions:
task
bass
trash
stray
assert

In [None]:
# Hardcoded input phonemes for testing
input_phonemes = ['P', 'L', 'AY1']

# Get words from the WordNet dictionary
#word_list = speech_therapy_corpus
word_list = list(set(wordnet.words()))[:5000]

# Get top 5 suggestions
top_suggestions = suggest_words(input_phonemes, word_list, num_suggestions=5)

In [None]:
# Display suggestions
if top_suggestions:
    print("Top 5 Suggestions:")
    for suggestion in top_suggestions:
        print(suggestion)
else:
    print("No suggestions found.")

Top 5 Suggestions:
plop
type
wipe
pail
glop




output
Top 5 Suggestions:
nile
ripe
lyre
lamp
ploughed

Top 5 Suggestions:
cordon_off
simplex
tyrosine
polluted
lamp