In [1]:
# Import required modules
import sys
from pathlib import Path
import pandas as pd
from typing import List, Dict, Any
import logging
import asyncio
import logging
from dotenv import load_dotenv

# Add project root to Python path if needed
project_root = str(Path().resolve().parent)
if project_root not in sys.path:
    sys.path.append(project_root)

# Import necessary components
from src.nb_helpers.environment import setup_notebook_env, verify_environment
from src.semantic_analyzer import SemanticAnalyzer
from src.core.config import AnalyzerConfig
from src.core.language_processing import create_text_processor
from src.core.llm.factory import create_llm
from src.utils.output_formatter import (
    ExcelFormatter, DetailedFormatter, OutputDetail, ExcelOutputConfig, BaseColumnFormat
)
from FileUtils import FileUtils, OutputFileType

logger = logging.getLogger(__name__)


In [2]:
# Set up environment and verify
setup_notebook_env(log_level="DEBUG")
verify_environment()



2024-12-11 12:04:14,909 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer
2024-12-11 12:04:14,917 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-11 12:04:14,961 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer
2024-12-11 12:04:14,970 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
Environment Check Results:

Basic Setup:
-----------
✓ Project root in path
✓ FileUtils initialized
✓ .env file loaded

Environment Variables:
---------------------
✓ OPENAI_API_KEY set
✓ ANTHROPIC_API_KEY set

Project Structure:
-----------------
✓ Raw data exists
✓ Processed data exists
✓ Configuration exists
✓ Main config.yaml exists

Environment Status: Ready ✓


True

In [3]:
# Initialize FileUtils
file_utils = FileUtils()

# Create analyzer config with OpenAI as default
config = AnalyzerConfig()
config.config["models"]["default_provider"] = "openai"
config.config["models"]["default_model"] = "gpt-4o-mini"

# Create LLM instance
llm = create_llm(provider="openai", config=config)

# Initialize language processors
en_processor = create_text_processor(language="en")
fi_processor = create_text_processor(language="fi")



2024-12-11 12:04:15,056 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer
2024-12-11 12:04:15,063 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
2024-12-11 12:04:15,098 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer
2024-12-11 12:04:15,105 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
2024-12-11 12:04:23,139 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:23,148 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:23,216 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:23,225 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {}
DEBUG: Trying library paths: ['C:\\scripts\\Voikko\\libvoikko-1.dll', 'C:\\Program Files\\Voikko\\libvoikko-1.dll', 'C:\\Voikko\\libvoikko-1.dll']
DEBUG: Trying dictionary paths: ['C:\\scripts\\Voikko', 'C:\\Program Files\\Voikko', 'C:\\Voikko']
DEBUG: Added C:\scripts\Voikko to DLL search path
INFO: Successfully initialized Voikko with path: C:\scripts\Voikko


In [4]:
# Create analyzer with proper initialization
analyzer = SemanticAnalyzer(
    llm=llm,
    file_utils=file_utils,
    parameter_file="parameters_en.xlsx"  # Default to English parameters
)



2024-12-11 12:04:23,385 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:23,393 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:24,184 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:24,193 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Semantic analyzer initialization complete


In [5]:
# Test data for demonstrations
test_texts = {
    "en": {
        "technical": """Machine learning models are trained using large datasets to recognize patterns. 
                    The neural network architecture includes multiple layers for feature extraction. 
                    Data preprocessing and feature engineering are crucial steps.""",
        
        "business": """Q3 financial results show 15% revenue growth and improved profit margins. 
                    Customer acquisition costs decreased while retention rates increased. 
                    Market expansion strategy focuses on emerging technology sectors.""",
        
        "mixed": """Our AI platform leverages machine learning to optimize customer engagement.
                 The system analyzes user behavior patterns to improve conversion rates.
                 Q2 results showed 25% improvement in customer retention metrics."""
    },
    "fi": {
        "technical": """Koneoppimismalleja koulutetaan suurilla datajoukolla tunnistamaan kaavoja. 
                    Neuroverkon arkkitehtuuri sisältää useita kerroksia piirteiden erottamiseen. 
                    Datan esikäsittely ja piirteiden suunnittelu ovat keskeisiä vaiheita.""",
        
        "business": """Q3 taloudelliset tulokset osoittavat 15% liikevaihdon kasvun ja parantuneet katteet. 
                    Asiakashankinnan kustannukset laskivat ja asiakaspysyvyys parani. 
                    Markkinalaajennusstrategia keskittyy nouseviin teknologiasektoreihin."""
    }
}



In [6]:
# Initialize formatters with different detail levels
summary_formatter = ExcelFormatter(
    file_utils=file_utils,
    config=ExcelOutputConfig(detail_level=OutputDetail.SUMMARY)
)

detailed_formatter = DetailedFormatter(
    file_utils=file_utils,
    config=ExcelOutputConfig(detail_level=OutputDetail.DETAILED)
)



In [7]:
# 1. Single Text Analysis with Different Detail Levels
async def demonstrate_single_analysis(text: str, language: str = "en"):
    """Demonstrate analysis of single text with different detail levels."""
    print(f"Analyzing {language.upper()} text:")
    print("-" * 50)
    print(text)
    print("\n")

    try:
        # Update analyzer's language and configuration
        analyzer.set_language(language)
        
        # Perform analysis
        result = await analyzer.analyze(
            text,
            analysis_types=["keywords", "themes", "categories"],
            language=language  # Pass language explicitly
        )

        print("\nAnalysis Results:")
        print("-" * 50)
        print(result)

        # Show summary format
        print("\nSummary Output:")
        print("-" * 50)
        summary = summary_formatter.format_output(
            results={
                "keywords": result.keywords,
                "themes": result.themes,
                "categories": result.categories
            },
            analysis_types=["keywords", "themes", "categories"]
        )

        logger.debug(f"Summary formatter results: {summary}")
        
        for analysis_type, output in summary.items():
            print(f"\n{analysis_type.title()}:")
            if output:
                items = output.split("; ")
                for item in items:
                    print(f"  • {item}")
            else:
                print("  No results")

        # Show detailed format
        print("\nDetailed Output:")
        print("-" * 50)
        detailed = detailed_formatter.format_detailed_output(
            results={
                "keywords": result.keywords,
                "themes": result.themes,
                "categories": result.categories
            },
            analysis_types=["keywords", "themes", "categories"]
        )
        
        for analysis_type, output in detailed.items():
            print(f"\n{analysis_type.title()}:")
            print("Summary:")
            if output["summary"]:
                items = output["summary"].split("; ")
                for item in items:
                    print(f"  • {item}")
            
            print("\nDetails:")
            if isinstance(output["details"], str):
                # Handle string format
                items = output["details"].split("; ")
                for item in items:
                    print(f"  • {item}")
            elif isinstance(output["details"], dict):
                # Handle dictionary format
                for key, value in output["details"].items():
                    print(f"  • {key}:")
                    if isinstance(value, list):
                        for item in value:
                            print(f"    - {item}")
                    else:
                        print(f"    {value}")

            print("\nMetadata:")
            for key, value in output["metadata"].items():
                print(f"  • {key}: {value}")
    except Exception as e:
        logger.error(f"Analysis failed: {e}")
        print(f"Error: {str(e)}")

In [8]:
async def demonstrate_excel_processing_details():
    """Demonstrate processing of Excel file."""
    input_filename = "test_content_short.xlsx"  # Without extension
    output_filename = "analysis_results"  # Without extension
    text_column = "keskustelu"
    language_column = "language"  # Optional column for language detection

    print(f"Processing Excel file: {input_filename}")
    print("-" * 50)

    try:
        # Load input file using FileUtils
        input_df = file_utils.load_single_file(
            file_path=input_filename,
            input_type="raw"
        )

        if text_column not in input_df.columns:
            print(f"Error: Column '{text_column}' not found in input file")
            return

        # Process each row with appropriate language handling
        results = []
        for idx, row in input_df.iterrows():
            try:
                # Determine language
                language = row[language_column] if language_column in input_df.columns else "en"
                
                # Update analyzer's language
                analyzer.set_language(language)

                # Analyze text
                result = await analyzer.analyze(
                    row[text_column],
                    analysis_types=["keywords", "themes", "categories"],
                    language=language
                )

                # Format results using the formatters
                formatted_result = {
                    "keywords": summary_formatter.format_output(
                        {"keywords": result.keywords},
                        ["keywords"]
                    ).get("keywords", ""),
                    
                    "themes": summary_formatter.format_output(
                        {"themes": result.themes},
                        ["themes"]
                    ).get("themes", ""),
                    
                    "categories": summary_formatter.format_output(
                        {"categories": result.categories},
                        ["categories"]
                    ).get("categories", ""),
                    
                    "language": language
                }
                
                results.append(formatted_result)
                print(f"Processed row {idx + 1}/{len(input_df)} ({language})")

            except Exception as e:
                logger.error(f"Error processing row {idx + 1}: {e}")
                results.append({
                    "keywords": f"Error: {str(e)}",
                    "themes": f"Error: {str(e)}",
                    "categories": f"Error: {str(e)}",
                    "language": language
                })

        # Create results DataFrame
        results_df = input_df.copy()
        for key in ["keywords", "themes", "categories"]:
            results_df[key] = [r.get(key, "") for r in results]

        # Save using FileUtils
        saved_files, _ = file_utils.save_data_to_storage(
            data={"analysis_results": results_df},
            output_type="processed",
            file_name=output_filename,
            output_filetype=OutputFileType.XLSX,
            include_timestamp=True
        )

        print("\nProcessing complete!")
        output_path = next(iter(saved_files.values()))
        print(f"Results saved to: {output_path}")
        
        print("\nFirst few rows of results:")
        pd.set_option('display.max_columns', None)
        pd.set_option('display.max_colwidth', 50)
        print(results_df.head())

    except Exception as e:
        print(f"Error processing Excel file: {e}")

In [9]:
async def demonstrate_excel_processing():
    """Demonstrate processing of Excel file with simplified output."""
    file_name = "test_content_fi.xlsx"
    output_filename = "analysis_results"
    text_column = "content"
    language_column = "language"

    # Create simplified formatter configuration
    simple_config = ExcelOutputConfig(
        detail_level=OutputDetail.MINIMAL,  # Use minimal detail level
        keywords_format=BaseColumnFormat(
            column_name="keywords",
            format_template="{keyword}",  # Just show keyword without score/domain
            included_fields=["keyword"],
            max_items=5  # Limit number of keywords
        ),
        themes_format=BaseColumnFormat(
            column_name="themes",
            format_template="{name}",  # Just show theme name
            included_fields=["name"],
            max_items=3  # Limit number of themes
        ),
        categories_format=BaseColumnFormat(
            column_name="categories",
            format_template="{name}",  # Just show category name
            included_fields=["name"],
            max_items=2  # Limit number of categories
        )
    )

    # Create formatter with simplified config
    simple_formatter = ExcelFormatter(
        file_utils=file_utils,
        config=simple_config
    )

    try:
        # Load and process input file
        input_df = file_utils.load_single_file(
            file_path=file_name,
            input_type="raw"
        )

        if text_column not in input_df.columns:
            print(f"Error: Column '{text_column}' not found in input file")
            return

        results = []
        for idx, row in input_df.iterrows():
            try:
                language = row[language_column] if language_column in input_df.columns else "en"
                analyzer.set_language(language)

                result = await analyzer.analyze(
                    row[text_column],
                    analysis_types=["keywords", "themes", "categories"],
                    language=language
                )

                # Format with simplified formatter
                formatted_result = {
                    "keywords": simple_formatter.format_output(
                        {"keywords": result.keywords},
                        ["keywords"]
                    ).get("keywords", ""),
                    
                    "themes": simple_formatter.format_output(
                        {"themes": result.themes},
                        ["themes"]
                    ).get("themes", ""),
                    
                    "categories": simple_formatter.format_output(
                        {"categories": result.categories},
                        ["categories"]
                    ).get("categories", ""),
                    
                    "language": language
                }
                results.append(formatted_result)

            except Exception as e:
                logger.error(f"Error processing row {idx + 1}: {e}")
                results.append({
                    "keywords": f"Error: {str(e)}",
                    "themes": f"Error: {str(e)}",
                    "categories": f"Error: {str(e)}",
                    "language": language
                })

        # Create and save results
        results_df = input_df.copy()
        for key in ["keywords", "themes", "categories"]:
            results_df[key] = [r.get(key, "") for r in results]

        # Configure pandas display options
        pd.set_option('display.max_columns', None)
        pd.set_option('display.max_colwidth', 30)  # Shorter column width
        pd.set_option('display.expand_frame_repr', False)

        # Save results
        saved_files, _ = file_utils.save_data_to_storage(
            data={"analysis_results": results_df},
            output_type="processed",
            file_name=output_filename,
            output_filetype=OutputFileType.XLSX,
            include_timestamp=True
        )

        print("\nProcessing complete!")
        print(f"Results saved to: {next(iter(saved_files.values()))}")
        print("\nFirst few rows of results:")
        print(results_df.head())

    except Exception as e:
        print(f"Error processing Excel file: {e}")

In [10]:
async def demonstrate_custom_format():
    """Demonstrate custom formatting options with language support."""
    # Create custom config with language-aware templates
    custom_config = ExcelOutputConfig(
        detail_level=OutputDetail.SUMMARY,
        keywords_format=BaseColumnFormat(
            column_name="key_terms",
            format_template="{keywords} ({domain}) [{language}]",
            included_fields=["keyword", "domain", "language"],
            confidence_threshold=0.5,
            max_items=3
        ),
        themes_format=BaseColumnFormat(
            column_name="main_themes",
            format_template="{name} ({confidence}) [{language}]",
            included_fields=["name", "confidence", "language"],
            confidence_threshold=0.6,
            max_items=2
        )
    )

    custom_formatter = ExcelFormatter(
        file_utils=file_utils,
        config=custom_config
    )

    # Test both languages
    for lang, texts in test_texts.items():
        print(f"\nAnalyzing {lang.upper()} text with custom format:")
        print("-" * 50)
        
        # Set appropriate language processor
        analyzer.language_processor = (
            fi_processor if lang == "fi" else en_processor
        )

        # Analyze each type of text
        for text_type, text in texts.items():
            print(f"\nText type: {text_type}")
            
            try:
                result = await analyzer.analyze(
                    text,
                    analysis_types=["keywords", "themes"]
                )

                custom_output = custom_formatter.format_output(
                    results=result,
                    analysis_types=["keywords", "themes"]
                )
                
                for analysis_type, output in custom_output.items():
                    print(f"\n{analysis_type.title()}:")
                    print(output)

            except Exception as e:
                print(f"Error analyzing {lang} {text_type} text: {e}")



In [12]:
async def run_demos():
    """Run all demonstrations with proper error handling."""
    try:
        # 1. English text analysis
        print("=== English Text Analysis Demo ===\n")
        await demonstrate_single_analysis(test_texts["en"]["mixed"], language="en")

        # 2. Finnish text analysis
        print("\n=== Finnish Text Analysis Demo ===\n")
        await demonstrate_single_analysis(test_texts["fi"]["technical"], language="fi")

        # 3. Excel processing
        print("\n=== Excel Processing Demo ===\n")
        await demonstrate_excel_processing()

        # 4. Custom format demo
        print("\n=== Custom Format Demo ===\n")
        await demonstrate_custom_format()

    except Exception as e:
        print(f"Error running demonstrations: {e}")
        raise



In [None]:
"""Run all demonstrations with proper error handling."""
# 1. English text analysis
print("=== English Text Analysis Demo ===\n")
await demonstrate_single_analysis(test_texts["en"]["mixed"], language="en")



In [None]:
# 2. Finnish text analysis
print("\n=== Finnish Text Analysis Demo ===\n")
await demonstrate_single_analysis(test_texts["fi"]["technical"], language="fi")



In [13]:
# 3. Excel processing
print("\n=== Excel Processing Demo ===\n")
await demonstrate_excel_processing()






=== Excel Processing Demo ===

2024-12-11 12:04:30,353 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:30,366 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:30,466 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:30,476 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:04:30,559 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:30,570 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:30,643 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:30,651 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Processing response: {'categories': [{'category': 'Machine Learning', 'confidence': 0.95, 'explanation': 'The text discusses the training of machine learning models using large datasets to identify patterns, which is a core concept in the field of machine learning.', 'evidence': [{'text': 'Koneoppimismalleja koulutetaan suurilla datajoukolla tunnistamaan kaavoja.', 'relevance': 0.9, 'matched_keywords': ['koneoppimismalli', 'datajoukko', 'kaava'], 'context': 'The sentence explicitly mentions training machine learning models with large datasets to recognize patterns.'}, {'text': 'Neuroverkon arkkitehtuuri sisältää useita kerroksia piirteiden erottamiseen.', 'relevance': 0.9, 'matched_keywords': ['neuroverkon', 'arkkitehtuuri', 'kerros', 'piirre'], 'context': 'This part of the text refers to the architecture of neural networks, which is a specific type of machine learning model.'}], 'themes': ['artificial intelligence', 'data analysis']}], 'relationships': {'Machine Learning': ['Artifici

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Koneoppimismallit', 'description': 'Koneoppimismallit ovat algoritmeja, jotka oppivat suurista datajoukoista tunnistamaan kaavoja ja tekemään ennusteita.', 'confidence': 0.95, 'keywords': ['koneoppimismalli', 'datajoukko', 'oppia'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Neuroverkon arkkitehtuuri', 'description': 'Neuroverkon arkkitehtuuri koostuu useista kerroksista, jotka mahdollistavat piirteiden erottamisen ja syvällisen oppimisen.', 'confidence': 0.9, 'keywords': ['neuroverkko', 'arkkitehtuuri', 'kerros'], 'domain': 'technical', 'parent_theme': 'Koneoppimismallit'}, {'name': 'Data ja sen merkitys', 'description': 'Suurten datajoukkojen käyttö on keskeistä koneoppimismallien tehokkuuden ja tarkkuuden kannalta.', 'confidence': 0.85, 'keywords': ['data', 'datajoukko', 'tunnistaminen'], 'domain': 'business', 'parent_theme': None}], 'evidence': {'Koneoppimismallit': [{'text': 'Koneoppimismalleja koulutetaan suurilla datajoukolla

INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:39,924 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:40,011 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:40,023 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:04:40,092 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:40,102 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:40,254 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:40,268 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Processing response: {'categories': [{'category': 'Cloud Services', 'confidence': 0.85, 'explanation': 'The text discusses cloud services and their scalable infrastructure, which directly relates to the category of Cloud Services.', 'evidence': [{'text': 'Pilvipalvelut tarjoavat skaalautuvan infrastruktuurin käyttöönottoon.', 'relevance': 0.9, 'matched_keywords': ['pilvipalvelu', 'infrastruktuuri'], 'context': 'The sentence emphasizes the scalability of cloud services.'}], 'themes': ['scalability', 'infrastructure']}, {'category': 'Microservices Architecture', 'confidence': 0.8, 'explanation': 'The mention of microservices architecture indicates a focus on modular system design, which is a key aspect of this category.', 'evidence': [{'text': 'Mikropalveluarkkitehtuuri mahdollistaa modulaarisen järjestelmäsuunnittelun.', 'relevance': 0.9, 'matched_keywords': ['mikropalvelu', 'modulaarinen'], 'context': 'This sentence highlights the modular design capabilities of microservices architect

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Pilvipalvelut', 'description': 'Pilvipalvelut tarjoavat skaalautuvan infrastruktuurin, joka mahdollistaa joustavan ja tehokkaan resurssien käytön liiketoiminnassa.', 'confidence': 0.95, 'keywords': ['pilvipalvelu', 'skaalautuva', 'infrastruktuuri'], 'domain': 'technical/business'}, {'name': 'Mikropalveluarkkitehtuuri', 'description': 'Mikropalveluarkkitehtuuri mahdollistaa modulaarisen järjestelmäsuunnittelun, mikä parantaa järjestelmien hallittavuutta ja kehityksen nopeutta.', 'confidence': 0.9, 'keywords': ['mikropalvelu', 'modulaarinen', 'järjestelmäsuunnittelu'], 'domain': 'technical/business'}], 'evidence': {'Pilvipalvelut': [{'text': 'Pilvipalvelut tarjoavat skaalautuvan infrastruktuurin käyttöönottoon.', 'relevance': 0.9, 'keywords': ['pilvipalvelu', 'skaalautuva', 'infrastruktuuri']}], 'Mikropalveluarkkitehtuuri': [{'text': 'Mikropalveluarkkitehtuuri mahdollistaa modulaarisen järjestelmäsuunnittelun.', 'relevance': 0.9, 'keywords': ['mik

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


2024-12-11 12:04:48,262 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:48,272 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:48,361 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:48,370 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:04:48,440 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:48,458 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:48,595 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:48,608 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Raw LLM response: {'themes': [{'name': 'Versionhallintajärjestelmät', 'description': 'Versionhallintajärjestelmät ovat työkaluja, jotka seuraavat ja hallitsevat lähdekoodin muutoksia, mahdollistaen tehokkaan kehitystyön.', 'confidence': 0.9, 'keywords': ['versionhallinta', 'lähdekoodi', 'muutokset'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Jatkuva integraatio', 'description': 'Jatkuva integraatio on prosessi, joka varmistaa koodin laadun ja mahdollistaa automaattitestauksen, mikä parantaa ohjelmistokehityksen tehokkuutta.', 'confidence': 0.95, 'keywords': ['jatkuva', 'integraatio', 'laatu', 'automaattitestaus'], 'domain': 'technical', 'parent_theme': None}], 'evidence': {'Versionhallintajärjestelmät': [{'text': 'Versionhallintajärjestelmät seuraavat lähdekoodin muutoksia.', 'relevance': 0.9, 'keywords': ['versionhallinta', 'lähdekoodi']}], 'Jatkuva integraatio': [{'text': 'Jatkuva integraatio varmistaa koodin laadun ja automaattitestauksen.', 'relevance': 0.95, 'keywor

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Processing response: {'categories': [{'category': 'Software Development', 'confidence': 0.85, 'explanation': 'The text discusses version control systems and continuous integration, which are key concepts in software development.', 'evidence': [{'text': 'Versionhallintajärjestelmät seuraavat lähdekoodin muutoksia.', 'relevance': 0.9, 'matched_keywords': ['lähdekoodi', 'muutos', 'versionhallintajärjestelmät'], 'context': 'The sentence introduces version control systems that track changes in source code.'}, {'text': 'Jatkuva integraatio varmistaa koodin laadun ja automaattitestauksen.', 'relevance': 0.9, 'matched_keywords': ['jatkuva', 'integraatio', 'laatu', 'koodi'], 'context': 'This part emphasizes the importance of continuous integration in ensuring code quality and automated testing.'}], 'themes': ['version control', 'continuous integration', 'code quality']}], 'relationships': {'Software Development': ['DevOps', 'Quality Assurance']}}

Processing category: {'category': 'Software De

INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:56,389 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:56,545 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:56,554 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:04:56,622 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:56,632 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:04:56,723 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:04:56,733 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Processing response: {'categories': [{'category': 'Business Performance', 'confidence': 0.95, 'explanation': 'The text discusses financial results, including revenue growth and improved margins, which are key indicators of business performance.', 'evidence': [{'text': 'Q3 taloudelliset tulokset osoittavat 15% liikevaihdon kasvun ja parantuneet katteet.', 'relevance': 0.9, 'matched_keywords': ['liikevaihto', 'kasvu', 'kate'], 'context': 'The text provides specific financial metrics indicating business performance.'}, {'text': 'Asiakashankinnan kustannukset laskivat ja asiakaspysyvyys parani.', 'relevance': 0.85, 'matched_keywords': ['asiakashankinta', 'kustannus', 'asiakaspysyvyys'], 'context': 'This part of the text highlights cost reduction in customer acquisition and improved customer retention, both of which are critical for assessing business health.'}], 'themes': ['financial growth', 'customer acquisition', 'retention strategies']}], 'relationships': {'Business Performance': ['Fi

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Taloudellinen kasvu', 'description': 'Viittaa yrityksen liikevaihdon kasvuun ja parantuneisiin taloudellisiin tuloksiin.', 'confidence': 0.95, 'keywords': ['liikevaihto', 'kasvu', 'taloudellinen'], 'domain': 'business', 'parent_theme': None}, {'name': 'Asiakashankinta ja pysyvyys', 'description': 'Käsittelee asiakashankinnan kustannusten laskua ja asiakaspysyvyyden parantumista.', 'confidence': 0.9, 'keywords': ['asiakashankinta', 'asiakaspysyvyys', 'kustannus'], 'domain': 'business', 'parent_theme': None}, {'name': 'Katteet', 'description': 'Viittaa parantuneisiin katteisiin, jotka ovat merkki yrityksen taloudellisesta terveydestä.', 'confidence': 0.85, 'keywords': ['katteet', 'parantunut'], 'domain': 'business', 'parent_theme': 'Taloudellinen kasvu'}], 'evidence': {'Taloudellinen kasvu': [{'text': 'Q3 taloudelliset tulokset osoittavat 15% liikevaihdon kasvun', 'relevance': 0.9, 'keywords': ['liikevaihto', 'kasvu']}], 'Asiakashankinta ja pysyvy

INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:05,358 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:05:05,445 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:05,456 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:05:05,519 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:05,528 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:05:05,624 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:05,636 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Processing response: {'categories': [{'category': 'Business Development', 'confidence': 0.85, 'explanation': 'The text discusses strategic partnerships and their role in promoting innovation and market penetration, which are key aspects of business development.', 'evidence': [{'text': 'Strategiset kumppanuudet edistävät innovaatiota ja markkinapenetraatiota.', 'relevance': 0.9, 'matched_keywords': ['kumppanuus', 'innovaatio', 'strateginen'], 'context': 'The text emphasizes the importance of strategic partnerships in driving innovation and market entry.'}], 'themes': ['innovation', 'market strategy']}, {'category': 'Research and Development', 'confidence': 0.75, 'explanation': 'The mention of R&D investments leading to new product launches indicates a focus on research and development activities.', 'evidence': [{'text': 'T&K-investoinnit johtivat kolmeen uuteen tuotelanseeraukseen.', 'relevance': 0.85, 'matched_keywords': ['T&K', 'tuotelanseeraukseen'], 'context': 'The text highlights 

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Strategic Partnerships', 'description': 'The role of strategic partnerships in fostering innovation and market penetration.', 'confidence': 0.95, 'keywords': ['strateginen', 'kumppanuudet', 'markkinapenetraatio'], 'domain': 'business', 'parent_theme': None}, {'name': 'Innovation', 'description': 'The impact of research and development investments on innovation and product launches.', 'confidence': 0.9, 'keywords': ['innovaatio', 'T&K-investoinnit', 'tuotelanseeraus'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Product Launches', 'description': 'The outcomes of R&D investments leading to new product launches.', 'confidence': 0.85, 'keywords': ['tuotelanseeraus', 'investoinnit', 'innovaatiot'], 'domain': 'business', 'parent_theme': 'Innovation'}], 'evidence': {'Strategic Partnerships': [{'text': 'Strategiset kumppanuudet edistävät innovaatiota ja markkinapenetraatiota.', 'relevance': 0.9, 'keywords': ['strateginen', 'kumppanuudet', 'm

INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:13,637 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:05:13,710 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:13,719 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T

2024-12-11 12:05:13,836 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:13,845 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage


2024-12-11 12:05:13,933 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


INFO: Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-11 12:05:13,942 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


INFO: FileUtils initialized with local storage
INFO: Loaded 747 stopwords from c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\configurations\stop_words\fi.txt
DEBUG: Initialized with config: {'default_language': 'en', 'content_column': 'content', 'analysis': {'keywords': {'max_keywords': 5, 'min_keyword_length': 3, 'include_compounds': True}, 'themes': {'max_themes': 3, 'min_confidence': 0.5, 'include_hierarchy': True}, 'categories': {'max_categories': 3, 'min_confidence': 0.3, 'require_evidence': True}}, 'models': {'default_provider': 'openai', 'default_model': 'gpt-4o-mini', 'parameters': {'temperature': 0.0, 'max_tokens': 1000, 'top_p': 1.0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0}, 'providers': {'azure': {'api_version': '2024-02-15-preview', 'api_type': 'azure'}, 'openai': {'api_type': 'open_ai'}, 'anthropic': {'api_type': 'anthropic'}}}, 'features': {'use_caching': True, 'use_async': T


Processing response: {'categories': [{'category': 'Business Efficiency', 'confidence': 0.85, 'explanation': 'The text discusses improvements in operational efficiency through process automation, which is a key aspect of business efficiency.', 'evidence': [{'text': 'Toiminnan tehokkuus parani prosessiautomaation avulla.', 'relevance': 0.9, 'matched_keywords': ['tehokkuus', 'toiminta'], 'context': 'The sentence highlights the enhancement of operational efficiency.'}], 'themes': ['process automation', 'customer satisfaction']}, {'category': 'Customer Satisfaction', 'confidence': 0.75, 'explanation': 'The mention of customer satisfaction metrics indicating positive development suggests a focus on customer experience.', 'evidence': [{'text': 'Asiakastyytyväisyysmittarit osoittavat positiivista kehitystä.', 'relevance': 0.85, 'matched_keywords': ['asiakastyytyväisyys'], 'context': 'This part of the text directly addresses customer satisfaction metrics.'}], 'themes': ['customer feedback', 's

INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-11 12:05:21,793 - LocalStorage - INFO - Saved Excel file with sheets: ['analysis_results']
INFO: Saved Excel file with sheets: ['analysis_results']



Raw LLM response: {'themes': [{'name': 'prosessiautomaatio', 'description': 'Prosessiautomaatio parantaa toiminnan tehokkuutta ja optimoi liiketoimintaprosesseja.', 'confidence': 0.95, 'keywords': ['prosessiautomaatio', 'tehokkuus'], 'domain': 'technical/business'}, {'name': 'asiakastyytyväisyys', 'description': 'Asiakastyytyväisyysmittarit osoittavat positiivista kehitystä, mikä viittaa parantuneeseen asiakaskokemukseen.', 'confidence': 0.9, 'keywords': ['asiakastyytyväisyys', 'mittarit'], 'domain': 'business'}], 'evidence': {'prosessiautomaatio': [{'text': 'Toiminnan tehokkuus parani prosessiautomaation avulla.', 'relevance': 0.9, 'keywords': ['prosessiautomaatio', 'tehokkuus']}], 'asiakastyytyväisyys': [{'text': 'Asiakastyytyväisyysmittarit osoittavat positiivista kehitystä.', 'relevance': 0.9, 'keywords': ['asiakastyytyväisyys', 'mittarit']}]}, 'relationships': {'prosessiautomaatio': ['asiakastyytyväisyys']}}

Processed LLM response: {'themes': [{'name': 'prosessiautomaatio', 'des

INFO: Data saved successfully: {'analysis_results_20241211_120521': 'c:\\Users\\tja\\OneDrive - Rastor-instituutti ry\\Tiedostot\\Rastor-instituutti\\kehittäminen\\analytiikka\\repos\\semantic-text-analyzer\\data\\processed\\analysis_results_20241211_120521.xlsx'}



Processing complete!
Results saved to: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\processed\analysis_results_20241211_120521.xlsx

First few rows of results:
            id       type language                        content                       keywords                         themes                     categories
0  technical_1  technical       fi  Koneoppimismalleja koulute...  koneoppimismalli; datajouk...  Koneoppimismallit (0.95); ...        Machine Learning (0.95)
1  technical_2  technical       fi  Pilvipalvelut tarjoavat sk...  pilvipalvelut; skaalautuva...  Pilvipalvelut (0.95); Mikr...  Cloud Services (0.85); Mic...
2  technical_3  technical       fi  Versionhallintajärjestelmä...  versionhallintajärjestelmä...  Versionhallintajärjestelmä...    Software Development (0.85)
3   business_1   business       fi  Q3 taloudelliset tulokset ...  liikevaihdon kasvu; asiaka...  Taloudellinen ka

In [None]:
# 4. Custom format demo
print("\n=== Custom Format Demo ===\n")
await demonstrate_custom_format()



In [13]:
# 5. Batch file analysis
# print("\n=== Batch File Analysis Demo ===\n")
# await demonstrate_batch_file_analysis()

In [None]:
# # 1. Single Text Analysis with Different Detail Levels
# async def demonstrate_single_analysis(text: str):
#     """Demonstrate analysis of single text with different detail levels."""
#     print("Analyzing text:")
#     print("-" * 50)
#     print(text)
#     print("\n")

#     # Perform analysis
#     result = await analyzer.analyze(
#         text,
#         analysis_types=["keywords", "themes", "categories"]
#     )

#     # Show summary format
#     print("Summary Output:")
#     print("-" * 50)
#     summary = summary_formatter.format_output(
#         results=result,
#         analysis_types=["keywords", "themes", "categories"]
#     )
#     for analysis_type, output in summary.items():
#         print(f"\n{analysis_type.title()}:")
#         print(output)

#     # Show detailed format
#     print("\nDetailed Output:")
#     print("-" * 50)
#     detailed = detailed_formatter.format_detailed_output(
#         results=result,
#         analysis_types=["keywords", "themes", "categories"]
#     )
#     for analysis_type, output in detailed.items():
#         print(f"\n{analysis_type.title()}:")
#         print("Summary:", output["summary"])
#         print("Details:", output["details"])
#         print("Metadata:", output["metadata"])



In [None]:
# For notebook execution
if __name__ == "__main__":
    await run_demos()