In [3]:
# Import required modules
import sys
from pathlib import Path
from typing import List, Dict, Any, Tuple, Union
import logging
import asyncio

# Add project root to Python path if needed
project_root = str(Path().resolve().parent)
if project_root not in sys.path:
    sys.path.append(project_root)



In [1]:
# # Example 1: Test individual keyword analyzer
async def test_keyword_analyzer():
    """Test keyword analyzer with different languages."""
    print("Testing Keyword Analyzer")
    print("=" * 50)
    
    # Initialize components
    parameter_handler = ParameterHandler("parameters_fi.xlsx")
    llm = create_llm()
    
    # Test English
    print("\nTesting English Technical Content:")
    en_processor = create_text_processor(language="en")
    keyword_analyzer_en = KeywordAnalyzer(
        llm=llm,
        config=parameter_handler.parameters.general.model_dump(),
        language_processor=en_processor
    )
    await test_individual_analyzer(keyword_analyzer_en, test_texts["en"]["technical"], "Keyword")
    
    # Test Finnish
    print("\nTesting Finnish Technical Content:")
    fi_processor = create_text_processor(language="fi")
    keyword_analyzer_fi = KeywordAnalyzer(
        llm=llm,
        config=parameter_handler.parameters.general.model_dump(),
        language_processor=fi_processor
    )
    await test_individual_analyzer(keyword_analyzer_fi, test_texts["fi"]["technical"], "Keyword")



In [2]:
# Example 2: Test all components
async def test_components_for_language(language: str):
    """Test all components for a specific language."""
from FileUtils import FileUtilspper()}")
    print("=" * 50)
    
    # Initialize components
    parameter_handler = ParameterHandler(f"parameters_{language}.xlsx")
    llm = create_llm()
    language_processor = create_text_processor(language=lang    # Create analyzers
    keyword_analyzer = KeywordAnalyzer(
        llm=llm,
        config=parameter_handler.parameters.general.model_dump(),
        language_processor=language_processor
    )
    
    theme_analyzer = ThemeAnalyzer(
        llm=llm,
        config=parameter_handler.parameters.general.model_dump(),
        language_processor=language_processor
    )
    
    category_analyzer = CategoryAnalyzer(
        categories=parameter_handler.parameters.categories,
        llm=llm,
        config=parameter_handler.parameters.general.model_dump(),
        language_processor=language_processor
    )
    
    # Test technical content
    print(f"\nTesting {language.upper()} Technical Content:")
    await test_individual_analyzer(keyword_analyzer, test_texts[language]["technical"], "Keyword")
    await test_individual_analyzer(theme_analyzer, test_texts[language]["technical"], "Theme")
    await test_individual_analyzer(category_analyzer, test_texts[language]["technical"], "Category")
    
    # Test business content
    print(f"\nTesting {language.upper()} Business Content:")
    await test_individual_analyzer(keyword_analyzer, test_texts[language]["business"], "Keyword")
    await test_individual_analyzer(theme_analyzer, test_texts[language]["business"], "Theme")
    await test_individual_analyzer(category_analyzer, test_texts[language]["business"], "Category")



In [5]:
# Example 3: Quick test of full pipeline
async def test_pipeline():
    """Test full pipeline with both languages."""
    print("Testing Full Pipeline")
    print("=" * 50)
    
    # Test English pipeline
    print("\nEnglish Pipeline:")
    en_analyzer = SemanticAnalyzer(parameter_file="parameters_en.xlsx")
    result = await en_analyzer.analyze(test_texts["en"]["technical"])
    print(f"Success: {result.success}")
    print(f"Keywords found: {len(result.keywords.keywords)}")
    print(f"Themes found: {len(result.themes.themes)}")
    print(f"Categories found: {len(result.categories.matches)}")
    
    # Test Finnish pipeline
    print("\nFinnish Pipeline:")
    fi_analyzer = SemanticAnalyzer(parameter_file="parameters_fi.xlsx")
    result = await fi_analyzer.analyze(test_texts["fi"]["technical"])
    print(f"Success: {result.success}")
    print(f"Keywords found: {len(result.keywords.keywords)}")
    print(f"Themes found: {len(result.themes.themes)}")
    print(f"Categories found: {len(result.categories.matches)}")

# Run the tests
async def run_all_tests():
    """Run all tests."""
    # Test individual component
    await test_keyword_analyzer()
    
    # Test all components by language
    await test_components_for_language("en")
    await test_components_for_language("fi")
    
    # Test full pipeline
    await test_pipeline()



In [None]:
# Run in notebook
# await run_all_tests()

# Or run individual tests:
# await test_keyword_analyzer()
await test_components_for_language("fi")
# await test_pipeline()

In [4]:
from FileUtils import FileUtils


In [5]:
# Import necessary components
from src.nb_helpers.environment import setup_notebook_env, verify_environment
from src.semantic_analyzer import SemanticAnalyzer
from src.utils.FileUtils.file_utils import FileUtils
from src.core.language_processing import create_text_processor
from src.core.llm.factory import create_llm
from src.loaders.parameter_handler import ParameterHandler
from src.analyzers.keyword_analyzer import KeywordAnalyzer
from src.analyzers.theme_analyzer import ThemeAnalyzer
from src.analyzers.category_analyzer import CategoryAnalyzer



In [6]:
project_root

In [7]:
from src.utils.FileUtils import FileUtils


In [None]:
import FileUtils