# Cross-Environment Semantic Analysis Demo

See also separate [documentation](../docs/ANALYSIS_DEMO_DOC.md) sheet


## 1. Environment Setup


#### Import dependencies

In [1]:
import asyncio
import logging
from pathlib import Path
import sys
from typing import Optional


# Add project root to path (for local environment)
project_root = str(Path().resolve().parent)
if project_root not in sys.path:
    sys.path.append(project_root)


In [2]:
# import main interface to analyzers
from src.semantic_analyzer import SemanticAnalyzer

# import formatting
from src.utils.formatting_config import OutputDetail, ExcelOutputConfig


In [3]:
# Import environment setup
from src.core.managers import EnvironmentManager, EnvironmentConfig

### Set up environment

In [None]:
# Set environment type
ENV_TYPE = "local"  # Change to "azure" when running in Azure ML and you want persistent blob storage

# Configure environment
env_config = EnvironmentConfig(
    env_type=ENV_TYPE,
    project_root=Path().resolve().parent,
    log_level="INFO" # use config.yaml or .env for now to change logging level
)
environment = EnvironmentManager(env_config)

# Get initialized components
components = environment.get_components()
file_utils = components["file_utils"]

# Configure logging for HTTP clients
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)

### User defined setup
- parameter file (how) and content file to be analyzed (what)


In [5]:
# parameter_file = "parameters_en.xlsx"
# content_file = "test_content_en.xlsx"


parameter_file = "parameters_fi.xlsx"
content_file = "test_content_fi.xlsx"

# Change to True if you want to use Azure OpenAI API, if not already defined in config.yaml
azure = False

## 2. Initialize Analyzer

-  Initialize analyzer with formatting config
-  Parameter file paths are handled automatically by FileUtils


In [None]:
# Example texts
texts = {
    "en": "Machine learning models analyze data efficiently.",
    "fi": "Koneoppimismallit analysoivat dataa tehokkaasti."
}

# Initialize analyzer
analyzer = SemanticAnalyzer(
    parameter_file=parameter_file,
    file_utils=file_utils
)

# # Helper function for text analysis
# async def analyze_text(text: str, language: str):
#     result = await analyzer.analyze(
#         text=text,
#         language=language,
#         analysis_types=["keywords", "themes", "categories"]
#     )
    
#     if result.success:
#         print(f"\nAnalysis results for {language}:")
#         print("Keywords:")
#         for kw in result.keywords.keywords:
#             print(f"• {kw.keyword} (score: {kw.score:.2f})")
            
#         print("\nThemes:")
#         for theme in result.themes.themes:
#             print(f"• {theme.name} ({theme.confidence:.2f})")
            
#         if result.categories and result.categories.categories:
#             print("\nCategories:")
#             for cat in result.categories.categories:
#                 print(f"• {cat.name} ({cat.confidence:.2f})")
#     else:
#         print(f"Error: {result.error}")

async def analyze_text(text: str, language: str):
    result = await analyzer.analyze(
        text=text,
        language=language,
        analysis_types=["keywords", "themes", "categories"]
    )
    
    if result.success:
        print(f"\nAnalysis results for {language}:")
        print("Keywords:")
        for kw in result.keywords.keywords:
            print(f"• {kw.keyword} (score: {kw.score:.2f})")
            
        print("\nThemes:")
        for theme in result.themes.themes:
            print(f"• {theme.name} ({theme.confidence:.2f})")
            
        if result.categories and result.categories.matches:
            print("\nCategories:")
            for cat in result.categories.matches:
                print(f"• {cat.name} ({cat.confidence:.2f})")
    else:
        print(f"Error: {result.error}")

#### Single text analysis


In [None]:
print("\n=== Single Text Analysis ===")
for lang, text in texts.items():
    await analyze_text(text, lang)

#### Excel processing


In [None]:
# Configure output formatting
output_config = ExcelOutputConfig(
    output_detail=OutputDetail.MINIMAL,
    include_metadata=True,
    include_confidence_scores=True
)

# Analyze Excel file
result_df = await analyzer.analyze_excel(
    content_file=content_file,
    analysis_types=["keywords", "themes", "categories"],
    save_results=True,
    output_file="results.xlsx",
    output_config=output_config
)

print("\nExcel analysis completed successfully")
print(f"Results saved to: results.xlsx")
print("\nAnalysis Results:")
print(result_df)

In [9]:
# # Configure output formatting
# output_config = ExcelOutputConfig(
#     output_detail=OutputDetail.MINIMAL,
#     include_metadata=True,
#     include_confidence_scores=True
# )

# # Analyze Excel file
# result = await analyzer.analyze_excel(
#     # excel_file=content_file,
#     content_file=content_file,

#     analysis_types=["keywords", "themes", "categories"],
#     save_results=True,
#     output_file="results.xlsx",
#     output_config=output_config
# )

# if result.success:
#     print("\nExcel analysis completed successfully")
#     print(f"Results saved to: {result.output_file}")
# else:
#     print(f"Error: {result.error}")