In [1]:
import ast, itertools
import json
import random
import pandas as pd
from collections import Counter

In [None]:
# Load core structures
with open('/Users/rafaelfelix/Projects/demos/benky-fy/tmp/core.json', 'r', encoding='utf-8') as f:
    core_data = {key: pd.DataFrame(item) for key, item in json.load(f).items()}
    structures = core_data['structures']
    vocab = core_data['vocab'] = pd.read_json('/Users/rafaelfelix/Projects/demos/benky-fy/tmp/vocab.json')
    verbs = pd.read_json('/Users/rafaelfelix/Projects/demos/benky-fy/tmp/verbs.json')
vocab_interests = json.load(open('/Users/rafaelfelix/Projects/demos/benky-fy/tmp/vocab_interests.json'))


In [None]:
structures

In [None]:
structure = structures.sample(1).iloc[0]
# vocab intrests:
#  politeness, people_identity, daily_life_lifestyle, places_travel_events, 
#  knowledge_communication, nature_description, actions_time, ai_professional
vocab_criterias = [
    ("priority_group", ["p0", "p1", "p2"]),
    ("tags", vocab_interests['places_travel_events'])
]

tvocab = vocab.copy()
for key, value in vocab_criterias:
    tvocab = tvocab[tvocab[key].apply(lambda x: bool(set(x) & set(value)) if isinstance(x, list) else x in value)]

words = {}
for key, value in structure.slots.items():
    if key == 'clause':
        print(NotImplementedError("Clause is not implemented yet"))
    elif key == "Verb":
        if value != "any" and value != ["any"]:
            pool = verbs[verbs.tags.apply(
                lambda x: bool(set(x['semantic'] if isinstance(x['semantic'], list) else [x['semantic']]) & set(value))
            )]
        else:
            pool = verbs
    elif key == "Adj":
        pool = vocab[vocab.category.isin(["adjective"])]
    else:
        pool = tvocab[tvocab.category.isin(value)]
    words[key] = pool.sample().iloc[0].english
    
print("structure: ", structure.structure)
print("theme: ", random.choice(structure.theme))
for key, value in words.items():
    print(f'{key}: {value}')


In [None]:
verbs.tags.apply(lambda x: x['usage']).explode().unique()

In [None]:
structures.slots.apply(lambda x: x.keys()).explode().unique()

In [None]:
context = vocab.english.tolist()

In [None]:
context += verbs.english.tolist()

In [None]:
with open('/Users/rafaelfelix/Projects/demos/benky-fy/tmp/context-words.txt', 'w') as f:
    f.write('\n'.join(context))

In [None]:
structures.iloc[0]

In [28]:
# Import the sentence generator
import sys
import random
import os
sys.path.append('../tmp')
from main import JapaneseSentenceGenerator, Sentence

# Initialize the generator
generator = JapaneseSentenceGenerator()

print("🎌 Japanese Sentence Generator Ready!")
print("=" * 50)

# Function to display sentences nicely
def display_sentence(sentence: Sentence):
    """Display a sentence with nice formatting"""
    print(f"🎯 Theme: {sentence.theme}")
    print(f"📝 Structure: {sentence.structure}")
    print(f"🇯🇵 Japanese: {sentence.japanese}")
    print(f"🇺🇸 English: {sentence.english}")
    print(f"🔧 Components: {sentence.components}")
    print(f"📌 Particles: {sentence.particles}")
    print(f"➕ Extensions: {sentence.extensions}")
    print("-" * 60)

# Test with different themes
themes_to_test = ["identity", "motion", "action_with_object", "description", "possession"]

sentence = generator.generate_sentence(random.choice(themes_to_test))
print(sentence.structure)
for key, value in sentence.components.items():
    print(f"{key}: {value['english']}")

🎌 Japanese Sentence Generator Ready!
A は Adj です
A: donut
Adj: [i-adj]


In [25]:
sentence.english

'This is a possession sentence: こうちゃ の てんぷら'

A: i (formal)
Verb: to forget


In [None]:
# Enhanced sentence generation with full metadata
def display_enhanced_sentence(sentence: Sentence):
    """Display a sentence with full component metadata"""
    print(f"🎯 Theme: {sentence.theme}")
    print(f"📝 Structure: {sentence.structure}")
    print(f"🇯🇵 Japanese: {sentence.japanese}")
    print(f"🇺🇸 English: {sentence.english}")
    print(f"📌 Particles: {sentence.particles}")
    print(f"➕ Extensions: {sentence.extensions}")
    print("\n🔧 Component Details:")
    print("-" * 40)
    
    for slot_name, component in sentence.components.items():
        print(f"\n📍 {slot_name}:")
        if isinstance(component, dict):
            # Display all available fields
            for key, value in component.items():
                if key == "tags" and isinstance(value, dict):
                    print(f"  🏷️  {key}:")
                    for tag_key, tag_value in value.items():
                        print(f"    - {tag_key}: {tag_value}")
                elif key == "conjugations" and isinstance(value, dict):
                    print(f"  🔄 {key}:")
                    for conj_key, conj_value in value.items():
                        if isinstance(conj_value, dict):
                            print(f"    - {conj_key}: {conj_value.get('hiragana', conj_value)}")
                        else:
                            print(f"    - {conj_key}: {conj_value}")
                else:
                    print(f"  📋 {key}: {value}")
        else:
            print(f"  📋 Value: {component}")
        print("-" * 20)

# Generate and display an enhanced sentence
print("🚀 Enhanced Sentence Generation with Full Metadata:")
print("=" * 60)
enhanced_sentence = generator.generate_sentence("motion")
display_enhanced_sentence(enhanced_sentence)


In [None]:
# Compare different themes with full metadata
def compare_themes_with_metadata(themes_list):
    """Compare multiple themes showing full metadata"""
    print("🔍 Theme Comparison with Full Metadata:")
    print("=" * 60)
    
    for i, theme in enumerate(themes_list, 1):
        print(f"\n🎯 THEME {i}: {theme.upper()}")
        print("=" * 40)
        
        sentence = generator.generate_sentence(theme)
        
        # Show basic info
        print(f"📝 Structure: {sentence.structure}")
        print(f"🇯🇵 Japanese: {sentence.japanese}")
        
        # Show component metadata summary
        print(f"\n🔧 Components Summary:")
        for slot_name, component in sentence.components.items():
            if isinstance(component, dict):
                english = component.get("english", "N/A")
                japanese = component.get("kana", component.get("hiragana", component.get("kanji", "N/A")))
                tags = component.get("tags", {})
                print(f"  📍 {slot_name}: {english} ({japanese})")
                if tags:
                    semantic_tags = tags.get("semantic", [])
                    usage_tags = tags.get("usage", [])
                    if semantic_tags:
                        print(f"    🎯 Semantic: {semantic_tags}")
                    if usage_tags:
                        print(f"    📚 Usage: {usage_tags}")
            else:
                print(f"  📍 {slot_name}: {component}")
        
        print("-" * 40)

# Compare different themes
themes_to_compare = ["identity", "action_with_object", "description", "possession"]
compare_themes_with_metadata(themes_to_compare)
