In [None]:
import os
import sys
from pathlib import Path
import asyncio
import logging
from typing import Dict, Any, Optional
from pprint import pprint

import pandas as pd

# Add project root to Python path
project_root = str(Path().resolve().parent)
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added {project_root} to Python path")

# Core components
from src.semantic_analyzer.analyzer import SemanticAnalyzer
from src.utils.FileUtils.file_utils import FileUtils
from src.analyzers.keyword_analyzer import KeywordAnalyzer
from src.core.language_processing import create_text_processor

# Initialize FileUtils and set up logging
file_utils = FileUtils()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)



Added /home/topi/data-science/repos/semantic-text-analyzer to Python path


In [2]:
def verify_environment():
    """Verify that the notebook environment is properly configured."""
    # Load environment variables
    from dotenv import load_dotenv
    env_path = Path(project_root) / ".env"
    env_loaded = load_dotenv(env_path)

    # Required environment variables
    required_env_vars = [
        'OPENAI_API_KEY',
        'ANTHROPIC_API_KEY',
    ]

    # Basic checks
    basic_checks = {
        "Project root in path": project_root in sys.path,
        "Can import src": "src" in sys.modules,
        "FileUtils initialized": hasattr(file_utils, "project_root"),
        ".env file loaded": env_loaded,
    }

    # Environment variable checks
    env_var_checks = {
        f"{var} set": os.getenv(var) is not None
        for var in required_env_vars
    }

    # Check for required paths using FileUtils
    expected_paths = {
        "Raw data": file_utils.get_data_path("raw"),
        "Processed data": file_utils.get_data_path("processed"),
        "Configuration": file_utils.get_data_path("configurations"),
        "Main config.yaml": Path(project_root) / "config.yaml"
    }
    
    path_checks = {
        f"{name} exists": path.exists()
        for name, path in expected_paths.items()
    }

    # Combine all checks
    all_checks = {
        **basic_checks,
        **env_var_checks,
        **path_checks
    }
    
    # Print results
    print("Environment Check Results:")
    print("=" * 50)
    
    def print_section(title, checks):
        print(f"\n{title}:")
        print("-" * len(title))
        for check, result in checks.items():
            status = "✓" if result else "✗"
            print(f"{status} {check}")
    
    print_section("Basic Setup", basic_checks)
    print_section("Environment Variables", env_var_checks)
    print_section("Project Structure", path_checks)
    
    # Overall status
    all_passed = all(all_checks.values())
    print("\n" + "=" * 50)
    print("Environment Status:", "Ready ✓" if all_passed else "Setup needed ✗")
    
    if not all_passed:
        print("\nSetup Instructions:")
        if not env_loaded:
            print("- Create a .env file in the project root with required API keys")
        for var in required_env_vars:
            if not os.getenv(var):
                print(f"- Add {var} to your .env file")
        for name, path in expected_paths.items():
            if not path.exists():
                print(f"- Create {name} directory at {path}")

    return all_passed

# Run verification
# verify_environment()



In [3]:
class KeywordTester:
    """Helper class for testing keyword analysis components."""
    
    def __init__(self):
        self.file_utils = FileUtils()
        self.test_texts = self._load_test_texts()
        
    def _load_test_texts(self) -> Dict[str, str]:
        """Load or create test texts."""
        texts = {
            "technical": """
                Python is a high-level programming language known for its simplicity.
                It supports multiple programming paradigms including procedural and
                object-oriented programming.
            """,
            "business": """
                The company's Q3 results exceeded expectations with revenue growth of 15%.
                Customer acquisition costs decreased while retention rates improved.
                The board has approved a new strategic initiative focusing on expansion.
            """,
            "finnish": """
                Ohjelmistokehittäjä työskentelee asiakasprojektissa kehittäen uusia 
                ominaisuuksia verkkokauppajärjestelmään. Tekninen toteutus vaatii
                erityistä huomiota tietoturvan osalta.
            """
        }
        
        # Save test texts using FileUtils
        df = pd.DataFrame([
            {"name": name, "content": content.strip()}
            for name, content in texts.items()
        ])
        
        self.file_utils.save_data_to_disk(
            data={"texts": df},
            output_type="raw",
            file_name="test_texts",
            output_filetype="xlsx",
            include_timestamp=False
        )
        
        return texts

    async def test_statistical_analysis(self, text_key: str):
        """Test statistical keyword extraction."""
        text = self.test_texts[text_key]
        language = "fi" if text_key == "finnish" else "en"
        
        # Create processor and analyzer
        processor = create_text_processor(language=language)
        analyzer = KeywordAnalyzer(
            config={"weights": {"statistical": 1.0, "llm": 0.0}},  # Statistical only
            language_processor=processor
        )
        
        print(f"\nTesting Statistical Analysis for {text_key}:")
        print("=" * 50)
        print(f"Text: {text[:100]}...")
        
        # Show processing steps
        tokens = processor.tokenize(text)
        print("\nTokens:")
        print(tokens)
        
        # Get statistical keywords
        results = await analyzer.analyze(text)
        print("\nStatistical Keywords:")
        if hasattr(results, "keywords"):
            for kw in results.keywords[:10]:
                print(f"- {kw}")
        
        return results

    async def test_llm_analysis(self, text_key: str):
        """Test LLM-based keyword extraction."""
        text = self.test_texts[text_key]
        language = "fi" if text_key == "finnish" else "en"
        
        analyzer = KeywordAnalyzer(
            config={"weights": {"statistical": 0.0, "llm": 1.0}},  # LLM only
            language_processor=create_text_processor(language=language)
        )
        
        print(f"\nTesting LLM Analysis for {text_key}:")
        print("=" * 50)
        print(f"Text: {text[:100]}...")
        
        results = await analyzer.analyze(text)
        print("\nLLM Keywords:")
        if hasattr(results, "keywords"):
            for kw in results.keywords[:10]:
                print(f"- {kw}")
        
        return results

    async def test_combined_analysis(self, text_key: str):
        """Test combined statistical and LLM analysis."""
        text = self.test_texts[text_key]
        language = "fi" if text_key == "finnish" else "en"
        
        analyzer = KeywordAnalyzer(
            config={
                "weights": {"statistical": 0.4, "llm": 0.6},
                "max_keywords": 10,
                "min_confidence": 0.3
            },
            language_processor=create_text_processor(language=language)
        )
        
        print(f"\nTesting Combined Analysis for {text_key}:")
        print("=" * 50)
        print(f"Text: {text[:100]}...")
        
        results = await analyzer.analyze(text)
        
        print("\nCombined Keywords:")
        if hasattr(results, "keywords"):
            for kw in results.keywords:
                print(f"- {kw}")
        
        return results



In [4]:
# Initialize tester
tester = KeywordTester()

# Create test functions that can be run in notebook cells
async def test_text(text_key: str):
    """Run all tests for a specific text."""
    print(f"\nTesting {text_key} text:")
    print("=" * 70)
    
    # Test each analysis type
    stat_results = await tester.test_statistical_analysis(text_key)
    llm_results = await tester.test_llm_analysis(text_key)
    combined_results = await tester.test_combined_analysis(text_key)
    
    # Compare results
    print("\nResults Comparison:")
    print("-" * 50)
    print("Statistical:", stat_results.keywords if hasattr(stat_results, "keywords") else [])
    print("LLM:", llm_results.keywords if hasattr(llm_results, "keywords") else [])
    print("Combined:", combined_results.keywords if hasattr(combined_results, "keywords") else [])
    
    return stat_results, llm_results, combined_results

2024-11-10 21:12:45 - src.utils.FileUtils.file_utils - INFO - Data saved to /home/topi/data-science/repos/semantic-text-analyzer/data/raw/test_texts.xlsx


In [5]:
# First verify stopwords are properly loaded
def verify_stopwords():
    """Verify stopwords for both English and Finnish."""
    
    # Check stopwords files
    stop_words_dir = file_utils.get_data_path("configurations") / "stop_words"
    print("\nStopwords Directory Contents:")
    print(f"Directory: {stop_words_dir}")
    if stop_words_dir.exists():
        for file in stop_words_dir.glob("*.txt"):
            print(f"\nFile: {file.name}")
            with open(file, 'r', encoding='utf-8') as f:
                words = {line.strip() for line in f if line.strip()}
                print(f"Number of words: {len(words)}")
                print("Sample words (first 10):")
                pprint(sorted(list(words))[:10])
    else:
        print("Stopwords directory not found!")
    
    # Test processors
    print("\nTesting Language Processors:")
    for lang in ["en", "fi"]:
        print(f"\n{lang.upper()} Processor:")
        processor = create_text_processor(language=lang)
        
        # Check stopwords
        if hasattr(processor, '_stop_words'):
            print(f"Number of stopwords: {len(processor._stop_words)}")
            print("Sample stopwords (first 10):")
            pprint(sorted(list(processor._stop_words))[:10])
        
        # Test some common words
        test_words = {
            "en": ["the", "and", "is", "that", "with", "for"],
            "fi": ["ja", "on", "että", "joka", "tai", "vain"]
        }
        
        print(f"\nTesting common {lang} words:")
        for word in test_words[lang]:
            print(f"{word}: {'stop word' if processor.is_stop_word(word) else 'not stop word'}")



In [6]:
async def test_keyword_extraction():
    """Test keyword extraction with full debug output."""
    text_key = "business"
    text = tester.test_texts[text_key]
    
    # Create language processor first
    language_processor = create_text_processor(language="en")
    
    # Create analyzer with explicit config
    try:
        # Use FileUtils to load excluded keywords
        excluded_data = file_utils.load_excel_sheets(
            file_utils.get_data_path("configurations") / "example_params.xlsx"
        )
        excluded_df = excluded_data.get("Excluded Keywords", pd.DataFrame())
        excluded_keywords = set(excluded_df['keyword'].dropna()) if not excluded_df.empty else set()
    except Exception as e:
        logger.warning(f"Could not load excluded keywords: {e}")
        excluded_keywords = set()
    
    analyzer = KeywordAnalyzer(
        config={
            "excluded_keywords": excluded_keywords,
            "min_keyword_length": 3,
            "max_keywords": 8,
            "focus": "business metrics and performance"
        },
        language_processor=language_processor
    )
    
    print("=== Keyword Extraction Test ===")
    print("\nOriginal text:")
    print(text)
    
    # Use the language processor directly for preprocessing steps
    print("\nProcessing Steps:")
    print("-" * 50)
    
    # Show tokenization
    tokens = language_processor.tokenize(text)
    print("\nTokens:")
    print(tokens)
    
    # Show filtered tokens
    filtered = [word for word in tokens 
               if language_processor.should_keep_word(word)]
    print("\nFiltered tokens (after stopword removal):")
    print(filtered)
    
    # Show base forms
    base_forms = [language_processor.get_base_form(word) for word in filtered]
    print("\nBase forms:")
    print(base_forms)
    
    # Run analysis
    print("\nRunning Analysis:")
    print("-" * 50)
    results = await analyzer.analyze(text)
    
    print("\nExtracted Keywords:")
    if hasattr(results, "keywords"):
        print("\nKeywords:", results.keywords)
        print("\nKeyword Scores:", results.keyword_scores if hasattr(results, "keyword_scores") else {})
        print("\nCompound Words:", results.compound_words if hasattr(results, "compound_words") else [])
        print("\nDomain Keywords:", results.domain_keywords if hasattr(results, "domain_keywords") else {})
    else:
        print("No keywords found in results")
        pprint(results)
    
    return results

async def run_tests():
    """Run all tests in sequence."""
    print("\nVerifying environment and components...")
    if not verify_environment():
        print("Environment verification failed!")
        return
    
    print("\nVerifying stopwords and language processing...")
    verify_stopwords()
    
    print("\nTesting keyword extraction...")
    results = await test_keyword_extraction()
    return results



In [7]:
# In notebook, you can now run:
results = await run_tests()

2024-11-10 21:12:45 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:12:45 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:12:45 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:12:45 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:12:45 - src.core.language_processing.finnish - INFO - Loaded 747 stopwords from /home/topi/data-science/repos/semantic-text-analyzer/data/configurations/stop_words/fi.txt
2024-11-10 21:12:45 - src.core.language_processing.finnish - INFO - Successfully initialized Voikko using system libraries
2024-11-10 21:12:45 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:12:45 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:12:45 - src.core.language_processing.en


Verifying environment and components...
Environment Check Results:

Basic Setup:
-----------
✓ Project root in path
✓ Can import src
✓ FileUtils initialized
✓ .env file loaded

Environment Variables:
---------------------
✓ OPENAI_API_KEY set
✓ ANTHROPIC_API_KEY set

Project Structure:
-----------------
✓ Raw data exists
✓ Processed data exists
✓ Configuration exists
✓ Main config.yaml exists

Environment Status: Ready ✓

Verifying stopwords and language processing...

Stopwords Directory Contents:
Directory: /home/topi/data-science/repos/semantic-text-analyzer/data/configurations/stop_words

File: fi.txt
Number of words: 747
Sample words (first 10):
['aiemmin',
 'aika',
 'aikaa',
 'aikaan',
 'aikaisemmin',
 'aikaisin',
 'aikajen',
 'aikana',
 'aikoina',
 'aikoo']

File: en.txt
Number of words: 733
Sample words (first 10):
['a',
 'abaft',
 'abafter',
 'abaftest',
 'about',
 'abouter',
 'aboutest',
 'above',
 'abover',
 'abovest']

File: stop-words-finnish.txt
Number of words: 747
Samp

2024-11-10 21:12:55 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Extracted Keywords:

Keywords: ['expansion', 'Q3 results', 'revenue growth', 'customer acquisition', 'retention rates']

Keyword Scores: {'expansion': 0.5682605452846603, 'Q3 results': 0.54, 'revenue growth': 0.54, 'customer acquisition': 0.48, 'retention rates': 0.48}

Compound Words: ['customer acquisition', 'retention rates', 'strategic initiative']

Domain Keywords: {'business_metrics': ['revenue growth', 'customer acquisition', 'retention rates', 'strategic initiative']}


In [8]:
# Test technical text
await test_text("technical")



2024-11-10 21:12:56 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:12:56 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:12:56 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



Testing technical text:

Testing Statistical Analysis for technical:
Text: 
                Python is a high-level programming language known for its simplicity.
             ...

Tokens:
['Python', 'high-level', 'programming', 'language', 'known', 'simplicity', 'supports', 'multiple', 'programming', 'paradigms', 'including', 'procedural', 'object-oriented', 'programming']


2024-11-10 21:12:58 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 21:12:58 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:12:58 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:12:58 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



Statistical Keywords:
- program
- high-level
- simplicity
- language

Testing LLM Analysis for technical:
Text: 
                Python is a high-level programming language known for its simplicity.
             ...


2024-11-10 21:13:00 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 21:13:00 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 21:13:00 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 21:13:00 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



LLM Keywords:
- Python
- programming
- language
- high-level

Testing Combined Analysis for technical:
Text: 
                Python is a high-level programming language known for its simplicity.
             ...


2024-11-10 21:13:03 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Combined Keywords:
- language
- high-level
- simplicity
- Python

Results Comparison:
--------------------------------------------------
Statistical: ['program', 'high-level', 'simplicity', 'language']
LLM: ['Python', 'programming', 'language', 'high-level']
Combined: ['language', 'high-level', 'simplicity', 'Python']


(KeywordOutput(language='en', error=None, success=True, keywords=['program', 'high-level', 'simplicity', 'language'], keyword_scores={'program': 1.0, 'high-level': 0.8236741949136424, 'simplicity': 0.8236741949136424, 'language': 0.7547440479624554}, compound_words=['high-level', 'object-oriented', 'procedural'], domain_keywords={'programming': ['Python', 'programming', 'language', 'high-level', 'simplicity', 'object-oriented', 'procedural']}),
 KeywordOutput(language='en', error=None, success=True, keywords=['Python', 'programming', 'language', 'high-level'], keyword_scores={'Python': 0.9, 'programming': 0.85, 'language': 0.8, 'high-level': 0.75}, compound_words=['high-level', 'object-oriented'], domain_keywords={'programming': ['Python', 'programming', 'language', 'high-level', 'simplicity', 'object-oriented', 'procedural']}),
 KeywordOutput(language='en', error=None, success=True, keywords=['language', 'high-level', 'simplicity', 'Python'], keyword_scores={'language': 0.781897619184

In [None]:
# Test business text
await test_text("business")
    


2024-11-10 20:06:59 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 20:06:59 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 20:06:59 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



Testing business text:

Testing Statistical Analysis for business:
Text: 
                The company's Q3 results exceeded expectations with revenue growth of 15%.
        ...

Tokens:
['company', 'Q3', 'results', 'exceeded', 'expectations', 'revenue', 'growth', '15', 'Customer', 'acquisition', 'costs', 'decreased', 'retention', 'rates', 'improved', 'board', 'approved', 'strategic', 'initiative', 'focusing', 'expansion']


2024-11-10 20:07:01 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 20:07:01 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 20:07:01 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 20:07:01 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



Statistical Keywords:
- expectation
- company
- revenue
- result
- exceed

Testing LLM Analysis for business:
Text: 
                The company's Q3 results exceeded expectations with revenue growth of 15%.
        ...


2024-11-10 20:07:03 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 20:07:03 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 20:07:03 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords
2024-11-10 20:07:03 - src.core.language_processing.english - INFO - Initialized English processor with 831 stopwords



LLM Keywords:
- Q3 results
- revenue growth
- customer acquisition
- strategic initiative
- retention rates

Testing Combined Analysis for business:
Text: 
                The company's Q3 results exceeded expectations with revenue growth of 15%.
        ...


2024-11-10 20:07:06 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Combined Keywords:
- revenue
- growth
- retention
- customer
- acquisition

Results Comparison:
--------------------------------------------------
Statistical: ['expectation', 'company', 'revenue', 'result', 'exceed']
LLM: ['Q3 results', 'revenue growth', 'customer acquisition', 'strategic initiative', 'retention rates']
Combined: ['revenue', 'growth', 'retention', 'customer', 'acquisition']


(KeywordOutput(language='en', error=None, success=True, keywords=['expectation', 'company', 'revenue', 'result', 'exceed'], keyword_scores={'expectation': 1.0, 'company': 0.8368288369533894, 'revenue': 0.8368288369533894, 'result': 0.783091851446946, 'exceed': 0.783091851446946}, compound_words=['customer acquisition', 'strategic initiative'], domain_keywords={'business': ['revenue', 'growth', 'acquisition', 'retention', 'expansion']}),
 KeywordOutput(language='en', error=None, success=True, keywords=['Q3 results', 'revenue growth', 'customer acquisition', 'strategic initiative', 'retention rates'], keyword_scores={'Q3 results': 0.9, 'revenue growth': 0.85, 'customer acquisition': 0.8, 'strategic initiative': 0.8, 'retention rates': 0.75}, compound_words=['customer acquisition', 'strategic initiative'], domain_keywords={'business': ['revenue', 'growth', 'acquisition', 'retention', 'expansion']}),
 KeywordOutput(language='en', error=None, success=True, keywords=['revenue', 'growth', 're

In [11]:
# Test Finnish text
await test_text("finnish")

2024-11-10 16:26:02 - src.core.language_processing.factory - INFO - Using default configuration



Testing finnish text:


2024-11-10 16:26:02 - src.core.language_processing.finnish - INFO - Loaded 747 stopwords from /home/topi/data-science/repos/semantic-text-analyzer/data/configurations/stop_words/fi.txt
2024-11-10 16:26:02 - src.core.language_processing.finnish - INFO - Successfully initialized Voikko using system libraries



Testing Statistical Analysis for finnish:
Text: 
                Ohjelmistokehittäjä työskentelee asiakasprojektissa kehittäen uusia 
              ...

Tokens:
['Ohjelmistokehittäjä', 'työskentelee', 'asiakasprojektissa', 'kehittäen', 'uusia', 'ominaisuuksia', 'verkkokauppajärjestelmään', 'Tekninen', 'toteutus', 'vaatii', 'erityistä', 'huomiota', 'tietoturvan', 'osalta']


2024-11-10 16:26:06 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 16:26:06 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 16:26:06 - src.core.language_processing.finnish - INFO - Loaded 747 stopwords from /home/topi/data-science/repos/semantic-text-analyzer/data/configurations/stop_words/fi.txt
2024-11-10 16:26:06 - src.core.language_processing.finnish - INFO - Successfully initialized Voikko using system libraries



Statistical Keywords:
- ohjelmistokehittäjä
- työskennellä
- asiakasprojekti
- kehittää
- uusi
- ominaisuus
- verkkokauppajärjestelmä
- tekninen

Testing LLM Analysis for finnish:
Text: 
                Ohjelmistokehittäjä työskentelee asiakasprojektissa kehittäen uusia 
              ...


2024-11-10 16:26:09 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-11-10 16:26:09 - src.core.language_processing.factory - INFO - Using default configuration
2024-11-10 16:26:09 - src.core.language_processing.finnish - INFO - Loaded 747 stopwords from /home/topi/data-science/repos/semantic-text-analyzer/data/configurations/stop_words/fi.txt
2024-11-10 16:26:09 - src.core.language_processing.finnish - INFO - Successfully initialized Voikko using system libraries



LLM Keywords:
- ohjelmistokehittäjä
- työskennellä
- asiakasprojekti
- kehittää
- uusi
- ominaisuus
- verkkokauppajärjestelmä
- tekninen

Testing Combined Analysis for finnish:
Text: 
                Ohjelmistokehittäjä työskentelee asiakasprojektissa kehittäen uusia 
              ...


2024-11-10 16:26:12 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Combined Keywords:
- ohjelmistokehittäjä
- työskennellä
- asiakasprojekti
- kehittää
- uusi
- ominaisuus
- verkkokauppajärjestelmä
- tekninen
- toteutus
- vaatia

Results Comparison:
--------------------------------------------------
Statistical: ['ohjelmistokehittäjä', 'työskennellä', 'asiakasprojekti', 'kehittää', 'uusi', 'ominaisuus', 'verkkokauppajärjestelmä', 'tekninen']
LLM: ['ohjelmistokehittäjä', 'työskennellä', 'asiakasprojekti', 'kehittää', 'uusi', 'ominaisuus', 'verkkokauppajärjestelmä', 'tekninen']
Combined: ['ohjelmistokehittäjä', 'työskennellä', 'asiakasprojekti', 'kehittää', 'uusi', 'ominaisuus', 'verkkokauppajärjestelmä', 'tekninen', 'toteutus', 'vaatia']


(KeywordOutput(language='fi', error=None, success=True, keywords=['ohjelmistokehittäjä', 'työskennellä', 'asiakasprojekti', 'kehittää', 'uusi', 'ominaisuus', 'verkkokauppajärjestelmä', 'tekninen'], keyword_scores={'ohjelmistokehittäjä': 1.0, 'työskennellä': 1.0, 'asiakasprojekti': 1.0, 'kehittää': 1.0, 'uusi': 1.0, 'ominaisuus': 1.0, 'verkkokauppajärjestelmä': 1.0, 'tekninen': 1.0, 'toteutus': 1.0, 'vaatia': 1.0, 'eritty': 1.0, 'huomio': 1.0, 'tietoturpa': 1.0, 'osa': 1.0, 'tietoturva': 0.0}, compound_words=['verkkokauppajärjestelmä'], domain_keywords={'software_development': ['ohjelmistokehittäjä', 'kehittää', 'toteutus', 'tietoturva']}),
 KeywordOutput(language='fi', error=None, success=True, keywords=['ohjelmistokehittäjä', 'työskennellä', 'asiakasprojekti', 'kehittää', 'uusi', 'ominaisuus', 'verkkokauppajärjestelmä', 'tekninen'], keyword_scores={'ohjelmistokehittäjä': 0.9, 'työskennellä': 0.0, 'asiakasprojekti': 0.85, 'kehittää': 0.75, 'uusi': 0.0, 'ominaisuus': 0.7, 'verkkokauppaj