In [1]:
import json
import re
from collections import defaultdict
from typing import Dict, List
import random
from metaphone import doublemetaphone
from IPython.core.display import display, HTML


In [2]:
import pronouncing
pronouncing.rhymes("tubes")

['boobs', 'cubes', "galoob's", "lube's"]

In [12]:
import json
import re
from collections import defaultdict
from typing import Dict, List
import random
import pronouncing
from IPython.core.display import display, HTML

class Song:
    def __init__(self, file_path: str):
        """
        Initialize the Song object with lyrics from a file.
        
        Args:
            file_path (str): The path to the JSON file containing the lyrics data.
        """
        self.lyrics_data = self.load_lyrics(file_path)
        self.lyrics = self.lyrics_data["lyrics"]
        self.tokenized_lyrics = self.tokenize_lyrics(self.lyrics)
        self.blacklist = {"a", "the", "can", "an"}  # Define blacklist before detecting rhymes
        self.rhyme_groups = self.detect_rhymes(self.tokenized_lyrics)

    def load_lyrics(self, file_path: str) -> Dict[str, str]:
        """
        Load the lyrics and metadata from a JSON file.
        
        Args:
            file_path (str): The path to the JSON file containing the lyrics data.
            
        Returns:
            dict: A dictionary with keys 'title', 'artist', and 'lyrics'.
        """
        with open(file_path, "r") as f:
            return json.load(f)

    def tokenize_lyrics(self, lyrics: str) -> List[List[str]]:
        """
        Tokenize the lyrics into lines and words.
        
        Args:
            lyrics (str): The lyrics text.
            
        Returns:
            list: A list of lists, where each inner list contains words of a line.
        """
        lines = lyrics.split('\n')
        return [re.findall(r'\b\w+\b', line.lower()) for line in lines]

    def get_phonetic_code(self, word: str) -> str:
        """
        Get the phonetic code of a word using the pronouncing library.
        
        Args:
            word (str): The word to be converted into a phonetic code.
            
        Returns:
            str: The phonetic code of the word.
        """
        pronunciations = pronouncing.phones_for_word(word)
        return pronunciations[0] if pronunciations else ''

    def get_rhyme_part(self, phonetic_code: str) -> str:
        """
        Extract the rhyming part (last syllable) from the phonetic code.
        
        Args:
            phonetic_code (str): The phonetic code of the word.
            
        Returns:
            str: The rhyming part of the phonetic code.
        """
        return phonetic_code.split()[-1] if phonetic_code else ''

    def detect_rhymes(self, tokenized_lyrics: List[List[str]]) -> Dict[str, List[str]]:
        """
        Detect rhymes in the tokenized lyrics using phonetic codes.
        
        Args:
            tokenized_lyrics (list): A list of lists containing tokenized lyrics.
            
        Returns:
            dict: A dictionary where keys are phonetic codes and values are lists of rhyming words.
        """
        rhyme_groups = defaultdict(list)
        for line in tokenized_lyrics:
            for word in line:
                if word not in self.blacklist:
                    phonetic_code = self.get_phonetic_code(word)
                    rhyme_part = self.get_rhyme_part(phonetic_code)
                    rhyme_groups[rhyme_part].append(word)

        # Filter out non-rhyming groups
        rhyme_groups = {k: v for k, v in rhyme_groups.items() if len(v) > 1}
        
        return rhyme_groups

    def assign_colors(self, rhyme_groups: Dict[str, List[str]]) -> Dict[str, str]:
        """
        Assign random colors to each rhyme group.
        
        Args:
            rhyme_groups (dict): A dictionary of rhyme groups.
            
        Returns:
            dict: A dictionary where keys are phonetic codes and values are colors.
        """
        color_palette = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#FF69B4", "#8B4513"]
        return {k: random.choice(color_palette) for k in rhyme_groups.keys()}

    def highlight_lyrics(self, tokenized_lyrics: List[List[str]], rhyme_colors: Dict[str, str]) -> str:
        """
        Highlight the lyrics with HTML/CSS.
        
        Args:
            tokenized_lyrics (list): A list of lists containing tokenized lyrics.
            rhyme_colors (dict): A dictionary of rhyme colors.
            
        Returns:
            str: The highlighted lyrics in HTML format.
        """
        highlighted_lyrics = []
        for line in tokenized_lyrics:
            highlighted_line = []
            for word in line:
                phonetic_code = self.get_phonetic_code(word)
                rhyme_part = self.get_rhyme_part(phonetic_code)
                color = rhyme_colors.get(rhyme_part, "black")
                highlighted_line.append(f'<span style="color: {color};">{word}</span>')
            highlighted_lyrics.append(" ".join(highlighted_line))
        
        return "<br>".join(highlighted_lyrics)

    def display_lyrics(self, html_lyrics: str):
        """
        Display the highlighted lyrics in HTML format for Jupyter Notebook.
        
        Args:
            html_lyrics (str): The highlighted lyrics in HTML format.
        """
        display(HTML(f"<div style='font-family: Arial, sans-serif;'>{html_lyrics}</div>"))

    def analyze_and_display(self):
        """
        Analyze the lyrics for rhymes and display the highlighted lyrics.
        """
        rhyme_colors = self.assign_colors(self.rhyme_groups)
        highlighted_lyrics_html = self.highlight_lyrics(self.tokenized_lyrics, rhyme_colors)
        self.display_lyrics(highlighted_lyrics_html)

    def display_rhyme_groups(self):
        """
        Display all rhyming words together.
        """
        rhyme_groups_str = []
        for rhyme_part, words in self.rhyme_groups.items():
            rhyme_groups_str.append(f"Rhyme part '{rhyme_part}': {', '.join(words)}")
        
        display(HTML("<br>".join(rhyme_groups_str)))


In [13]:
artist_name = "jcole"
song_title = "huntin wabbitz"
file_path = f"lyricsFiles/{artist_name}-{song_title}_lyrics.json"

song = Song(file_path)
song.analyze_and_display()

In [14]:
song.display_rhyme_groups()