<a href="https://colab.research.google.com/github/n1xd/phonology/blob/main/minimum_feature_specification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from itertools import combinations

def find_minimum_features(alphabet_phonemes, target_phonemes, csv_file='ft.csv'):
    """
    Find the minimum set of features needed to describe target phonemes given an alphabet.

    Args:
        alphabet_phonemes (list): List of phonemes in the language's alphabet
        target_phonemes (list): List of target phonemes to describe
        csv_file (str): Path to the CSV file containing phoneme features

    Returns:
        dict or None: Dictionary with 'solutions' key containing list of all minimal solutions,
                     each solution is a list of tuples (feature_name, value),
                     or None if no solution exists
    """
    # Load the phoneme feature data
    df = pd.read_csv(csv_file, encoding='utf-8')

    # Set the first column as index (phoneme names)
    df.set_index(df.columns[0], inplace=True)

    # Get feature columns
    feature_columns = df.columns.tolist()

    # Check if all phonemes exist in the dataset
    missing_phonemes = set(alphabet_phonemes + target_phonemes) - set(df.index)
    if missing_phonemes:
        raise ValueError(f"Phonemes not found in dataset: {missing_phonemes}")

    # Filter data for alphabet and target phonemes
    alphabet_data = df.loc[alphabet_phonemes]
    target_data = df.loc[target_phonemes]

    # Find features that can potentially describe the target group
    valid_features = []

    for feature in feature_columns:
        target_values = target_data[feature].values

        # Skip if any target phoneme has -1 for this feature
        if -1 in target_values:
            continue

        # Check if all target phonemes have the same value for this feature
        if len(set(target_values)) == 1:
            # Check if this feature value distinguishes targets from non-targets in alphabet
            target_value = target_values[0]
            non_target_phonemes = [p for p in alphabet_phonemes if p not in target_phonemes]

            if non_target_phonemes:
                non_target_data = df.loc[non_target_phonemes]
                non_target_values = non_target_data[feature].values

                # Feature is valid if not all non-targets have the same value as targets
                if not all(val == target_value for val in non_target_values):
                    valid_features.append(feature)
            else:
                # If no non-targets, any consistent feature is valid
                valid_features.append(feature)

    # Find minimum combination of features that uniquely identifies the target group
    def is_sufficient_combination(feature_combo):
        """Check if a combination of features uniquely identifies the target phonemes"""
        target_signatures = set()

        # Get signatures for target phonemes
        for phoneme in target_phonemes:
            signature = tuple(target_data.loc[phoneme, feature] for feature in feature_combo)
            target_signatures.add(signature)

        # Check if any non-target phoneme has the same signature
        non_target_phonemes = [p for p in alphabet_phonemes if p not in target_phonemes]

        for phoneme in non_target_phonemes:
            signature = tuple(alphabet_data.loc[phoneme, feature] for feature in feature_combo)
            if signature in target_signatures:
                return False

        return True

    # Try combinations of increasing size until we find sufficient ones
    for size in range(1, len(valid_features) + 1):
        solutions = []

        for combo in combinations(valid_features, size):
            if is_sufficient_combination(combo):
                # Create feature-value pairs for this solution
                solution = []
                for feature in combo:
                    # Get the shared value for this feature among target phonemes
                    shared_value = target_data[feature].iloc[0]  # All targets have same value
                    value_symbol = '+' if shared_value == 1 else '-'
                    solution.append((feature, value_symbol))
                solutions.append(solution)

        # If we found solutions at this size, return all of them (they're minimal)
        if solutions:
            if len(solutions) == 1:
                return {
                    'solutions': solutions,
                    'message': f"Unique minimal solution found with {size} feature(s)"
                }
            else:
                return {
                    'solutions': solutions,
                    'message': f"Multiple minimal solutions found with {size} feature(s) each"
                }

    # If no combination works, return None (no solution)
    return None

In [None]:
# Example usage:
alphabet = ['p', 'b', 't', 'd', 'k', 'g', 'm', 'n']
targets = ['m','n']  # voiceless stops
result = find_minimum_features(alphabet, targets)
if result:
    print(result['message'])
    for i, solution in enumerate(result['solutions'], 1):
        print(f"Solution {i}: {solution}")
else:
    print("No solution exists - target phonemes cannot be uniquely described")

# User Interface

In [None]:
!pip install ipywidgets pandas

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import HTML, display, clear_output
from itertools import combinations

In [None]:
class PhonologyAnalyzerUI:
    def __init__(self, csv_file='ft.csv'):
        self.csv_file = csv_file
        self.df = None
        self.all_phonemes = []
        self.selected_alphabet = []
        self.selected_targets = []
        self.step = 1

        # UI widget storage
        self.phoneme_buttons_map = {}
        self.step1_header = widgets.HTML()
        self.done_button = widgets.Button()

        # Load phoneme data
        self.load_data()

        # Create UI components
        self.create_widgets()

    def load_data(self):
        """Load phoneme data from CSV"""
        try:
            self.df = pd.read_csv(self.csv_file, encoding='utf-8')
            self.df.set_index(self.df.columns[0], inplace=True)
            self.all_phonemes = list(self.df.index)
            print(f"Loaded {len(self.all_phonemes)} phonemes from {self.csv_file}")
        except Exception as e:
            print(f"Error loading CSV: {e}")
            self.all_phonemes = []

    def create_widgets(self):
        """Create all UI widgets"""
        self.output = widgets.Output()
        with self.output:
            self.show_step1()
        display(self.output)

    def get_phoneme_color(self, phoneme):
        return '#5F9EA0' if phoneme in self.selected_alphabet else 'black'

    def show_step1(self):
        """Show Step 1: Select Alphabet"""
        clear_output(wait=True)

        # Build header widget
        self.step1_header.value = (
            "<h4>🔤 PHONOLOGICAL FEATURE ANALYZER</h4>"
            "<pre style='margin:0;'>"
            f"{'='*50}\n"
            f"Step 1: Select Alphabet Phonemes ({len(self.selected_alphabet)} selected)\n"
            f"{'-'*50}\n"
            f"{'✅ Selected: ' + ', '.join(self.selected_alphabet) if self.selected_alphabet else '❌ No phonemes selected'}"
            "</pre>"
        )

        # Button rows
        phoneme_buttons = []
        chunk_size = 20

        for i in range(0, len(self.all_phonemes), chunk_size):
            chunk = self.all_phonemes[i:i + chunk_size]
            row_buttons = []

            for phoneme in chunk:
                button = widgets.Button(
                    description=phoneme,
                    button_style='',
                    layout=widgets.Layout(width='50px', height='45px'),
                    style=dict(
                        button_color=self.get_phoneme_color(phoneme),
                        font_weight='bold',
                        text_color='white',
                        font_size='50px'
                    )
                )
                button.on_click(lambda b, p=phoneme: self.toggle_alphabet_phoneme(p))
                self.phoneme_buttons_map[phoneme] = button
                row_buttons.append(button)

            phoneme_buttons.append(widgets.HBox(
                row_buttons,
                layout=widgets.Layout(
                    margin='2px',
                    overflow='visible',
                    flex_wrap='nowrap'
                )
            ))

        # Scrollable container
        phoneme_area = widgets.VBox(
            phoneme_buttons,
            layout=widgets.Layout(
                height='600px',
                overflow_y='scroll',
                overflow_x='hidden',
                border='1px solid #ccc',
                padding='10px'
            )
        )

        # Done button
        self.done_button.description = "Done"
        self.done_button.button_style = 'primary'
        self.done_button.disabled = len(self.selected_alphabet) == 0
        self.done_button.layout = widgets.Layout(width='100px')
        self.done_button.on_click(lambda b: self.navigate_to_step2())

        controls = widgets.HBox([self.done_button], layout=widgets.Layout(justify_content='flex-end'))

        display(widgets.VBox([self.step1_header, phoneme_area, controls]))

    def toggle_alphabet_phoneme(self, phoneme):
        """Toggle phoneme selection for alphabet without full redraw"""
        if phoneme in self.selected_alphabet:
            self.selected_alphabet.remove(phoneme)
            if phoneme in self.selected_targets:
                self.selected_targets.remove(phoneme)
        else:
            self.selected_alphabet.append(phoneme)

        # Update button
        button = self.phoneme_buttons_map.get(phoneme)
        if button:
            button.style.button_color = self.get_phoneme_color(phoneme)

        # Update header and Done button
        self.step1_header.value = (
            "<h4>🔤 PHONOLOGICAL FEATURE ANALYZER</h4>"
            "<pre style='margin:0;'>"
            f"{'='*50}\n"
            f"Step 1: Select Alphabet Phonemes ({len(self.selected_alphabet)} selected)\n"
            f"{'-'*50}\n"
            f"{'✅ Selected: ' + ', '.join(self.selected_alphabet) if self.selected_alphabet else '❌ No phonemes selected'}"
            "</pre>"
        )
        self.done_button.disabled = len(self.selected_alphabet) == 0

    def show_step2(self):
        """Show Step 2: Select Target Phonemes"""
        clear_output(wait=True)

        print("🎯 PHONOLOGICAL FEATURE ANALYZER")
        print("=" * 50)
        print(f"Step 2: Select Target Phonemes ({len(self.selected_targets)} selected)")
        print("-" * 50)
        print(f"📚 Alphabet: {', '.join(self.selected_alphabet)}")
        if self.selected_targets:
            print(f"🎯 Targets: {', '.join(self.selected_targets)}")
        else:
            print("❌ No targets selected")
        print()

        # Build buttons
        target_buttons = []
        chunk_size = 15
        for i in range(0, len(self.selected_alphabet), chunk_size):
            chunk = self.selected_alphabet[i:i + chunk_size]
            row_buttons = []
            for phoneme in chunk:
                button = widgets.Button(
                    description=phoneme,
                    button_style='',
                    layout=widgets.Layout(width='45px', height='35px'),
                    style=dict(
                        button_color='#5F9EA0' if phoneme in self.selected_targets else 'black',
                        font_weight='bold',
                        text_color='white',
                        font_size='50px'
                    )
                )
                button.on_click(lambda b, p=phoneme: self.toggle_target_phoneme(p))
                row_buttons.append(button)
            target_buttons.append(widgets.HBox(row_buttons))

        phoneme_area = widgets.VBox(
            target_buttons,
            layout=widgets.Layout(
                height='600px',
                overflow_y='scroll',
                overflow_x='hidden',
                border='1px solid #ccc',
                padding='10px'
            )
        )

        # Controls
        edit_alphabet_button = widgets.Button(description="Edit Alphabet", layout=widgets.Layout(width='120px'))
        edit_alphabet_button.on_click(lambda b: self.navigate_to_step1())

        analyze_button = widgets.Button(
            description="Analyze",
            button_style='success',
            disabled=len(self.selected_targets) == 0,
            layout=widgets.Layout(width='100px')
        )
        analyze_button.on_click(lambda b: self.navigate_to_step3())

        controls = widgets.HBox([edit_alphabet_button, analyze_button], layout=widgets.Layout(justify_content='space-between'))

        display(widgets.VBox([phoneme_area, controls]))

    def toggle_target_phoneme(self, phoneme):
        """Toggle phoneme selection for targets"""
        if phoneme in self.selected_targets:
            self.selected_targets.remove(phoneme)
        else:
            self.selected_targets.append(phoneme)
        with self.output:
            self.show_step2()

    def show_step3(self):
        """Show Step 3: Results"""
        clear_output(wait=True)
        print("📊 PHONOLOGICAL FEATURE ANALYZER")
        print("=" * 50)
        print("Step 3: Analysis Results")
        print("-" * 50)
        print(f"📚 Alphabet: {', '.join(self.selected_alphabet)}")
        print(f"🎯 Targets: {', '.join(self.selected_targets)}")
        print()
        print("🔍 Running analysis...")

        results = find_minimum_features(self.selected_alphabet, self.selected_targets, self.csv_file)

        print("\n" + "=" * 50)
        print("RESULTS:")
        print("=" * 50)
        if results is None:
            print("❌ NO SOLUTION FOUND")
        else:
            print(f"✅ {results['message'].upper()}\n")
            for i, solution in enumerate(results['solutions'], 1):
                specs = " ".join([f"[{val}{feat}]" for feat, val in solution])
                print(f"Solution {i}: {specs}\n")

        edit_button = widgets.Button(description="Edit Alphabet", layout=widgets.Layout(width='120px'))
        edit_button.on_click(lambda b: self.navigate_to_step1())

        another_button = widgets.Button(description="Analyze Another", button_style='info', layout=widgets.Layout(width='130px'))
        another_button.on_click(lambda b: self.navigate_to_step2_reset())

        done_button = widgets.Button(description="Done", button_style='danger', layout=widgets.Layout(width='100px'))
        done_button.on_click(lambda b: self.close_interface())

        controls = widgets.HBox([edit_button, another_button, done_button], layout=widgets.Layout(justify_content='space-between'))
        display(controls)

    def navigate_to_step1(self):
        self.step = 1
        with self.output:
            self.show_step1()

    def navigate_to_step2(self):
        self.step = 2
        with self.output:
            self.show_step2()

    def navigate_to_step2_reset(self):
        self.selected_targets = []
        self.step = 2
        with self.output:
            self.show_step2()

    def navigate_to_step3(self):
        self.step = 3
        with self.output:
            self.show_step3()

    def close_interface(self):
        with self.output:
            clear_output(wait=True)
            print("👋 Interface closed. Thank you for using the Phonological Feature Analyzer!")

# Entry point function
def launch_phonology_analyzer(csv_file='ft.csv'):
    return PhonologyAnalyzerUI(csv_file)

In [None]:
analyzer = launch_phonology_analyzer('ft.csv')