In [1]:
import asyncio
from pathlib import Path
import logging
import sys
from typing import List, Optional

# Add project root to Python path if needed
project_root = str(Path().resolve().parent)
if project_root not in sys.path:
    sys.path.append(project_root)

from tqdm import tqdm
import pandas as pd
from FileUtils import FileUtils, OutputFileType

from src.semantic_analyzer import SemanticAnalyzer

# Configure logging
# logging.basicConfig(
#     level=logging.INFO,
#     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
# )
# logger = logging.getLogger(__name__)
from src.nb_helpers.environment import setup_notebook_env, verify_environment




In [None]:
# Set up environment and verify
setup_notebook_env(log_level="WARNING")
verify_environment()



2024-12-12 18:54:36,486 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,486 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,497 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:36,497 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:36,551 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,551 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,558 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:36,558 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


Environment Check Results:

Basic Setup:
-----------
✓ Project root in path
✓ FileUtils initialized
✓ .env file loaded

Environment Variables:
---------------------
✓ OPENAI_API_KEY set
✓ ANTHROPIC_API_KEY set

Project Structure:
-----------------
✓ Raw data exists
✓ Processed data exists
✓ Configuration exists
✓ Main config.yaml exists

Environment Status: Ready ✓


True

In [3]:
def display_analysis_summary(results: pd.DataFrame) -> None:
    """Display formatted analysis summary."""
    print("\nAnalysis Results Summary")
    print("=" * 50)

    # Basic stats
    print(f"Total rows processed: {len(results)}")
    if "language" in results.columns:
        print(f"Language: {results['language'].iloc[0]}")
    if "processing_time" in results.columns:
        print(
            f"Average processing time: {results['processing_time'].mean():.2f}s"
        )

    # Results by type
    result_sections = {
        "Keywords": [
            col for col in results.columns if col.startswith("keywords_")
        ],
        "Themes": [col for col in results.columns if col.startswith("themes_")],
        "Categories": [
            col for col in results.columns if col.startswith("categories_")
        ],
    }

    for section_name, columns in result_sections.items():
        if columns:
            print(f"\n{section_name} Results")
            print("-" * 50)

            # Display first 3 rows for each result type
            sample_results = results[columns].head(3)

            # Format each cell for display
            for idx, row in sample_results.iterrows():
                print(f"\nRow {idx + 1}:")
                for col in columns:
                    value = row[col]
                    if pd.notna(value):
                        print(f"  {col.split('_', 1)[1]}: {value}")


async def analyze_excel_content(
    content_file: str = "test_content_en.xlsx",
    parameter_file: str = "parameters_en.xlsx",
    content_column: str = "content",
    analysis_types: Optional[List[str]] = None,
    batch_size: int = 10,
) -> None:
    """Run Excel-based analysis with progress reporting."""
    try:
        # Initialize FileUtils
        file_utils = FileUtils()

        # Verify paths with progress
        with tqdm(total=2, desc="Checking files") as pbar:
            content_path = file_utils.get_data_path("raw") / content_file
            pbar.update(1)

            param_path = file_utils.get_data_path("parameters") / parameter_file
            pbar.update(1)

            if not content_path.exists():
                raise FileNotFoundError(
                    f"Content file not found: {content_path}"
                )
            if not param_path.exists():
                raise FileNotFoundError(
                    f"Parameter file not found: {param_path}"
                )

        print("\nStarting analysis:")
        print(f"Content file: {content_path}")
        print(f"Parameter file: {param_path}")
        print(f"Analysis types: {analysis_types or 'all'}")

        # Create and run analyzer
        analyzer = SemanticAnalyzer.from_excel(
            content_file=content_file,
            parameter_file=parameter_file,
            content_column=content_column,
            file_utils=file_utils,
        )

        # Run analysis with progress
        results = await analyzer.analyze_excel(
            analysis_types=analysis_types,
            batch_size=batch_size,
            save_results=True,
            output_file="analysis_results",
            show_progress=True,
        )

        # Display formatted results
        display_analysis_summary(results)

    except Exception as e:
        logger.error(f"Analysis failed: {e}", exc_info=True)




In [4]:
if __name__ == "__main__":
    print("Running Excel-based analysis...")

    # await run(
    await analyze_excel_content(
        content_file="test_content_fi.xlsx",
        parameter_file="parameters_fi.xlsx",
        analysis_types=["keywords", "themes"],
    )
    # )


Running Excel-based analysis...
2024-12-12 18:54:36,696 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,696 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:36,703 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:36,703 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
Checking files: 100%|██████████| 2/2 [00:00<00:00, 2000.14it/s]
2024-12-12 18:54:36,712 - src.semantic_analyzer.analyzer - INFO - Initializing Excel Semantic Analyzer...



Starting analysis:
Content file: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\raw\test_content_en.xlsx
Parameter file: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\parameters\parameters_en.xlsx
Analysis types: ['keywords', 'themes']


2024-12-12 18:54:37,568 - src.excel_analysis.base - INFO - Successfully loaded content file with 9 rows


2024-12-12 18:54:37,615 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:37,615 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:37,624 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:37,624 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:37,750 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:37,750 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:37,759 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:37,759 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
2024-12-12 18:54:39,399 - src.semantic_analyzer.analyzer - INFO - Initializing analyzers for language: en


2024-12-12 18:54:39,437 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:39,437 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:39,445 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:39,445 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,708 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,708 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,718 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,718 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,799 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,799 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,807 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,807 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,920 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,920 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,928 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,928 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,982 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,982 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:47,991 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:47,991 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:48,184 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:48,184 - FileUtils.core.file_utils - INFO - Project root: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer


2024-12-12 18:54:48,214 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage


2024-12-12 18:54:48,214 - FileUtils.core.file_utils - INFO - FileUtils initialized with local storage
2024-12-12 18:54:48,224 - src.semantic_analyzer.analyzer - INFO - Successfully initialized all analyzers
2024-12-12 18:54:48,226 - src.semantic_analyzer.analyzer - INFO - Running analysis types: ['keywords', 'themes']
Analysis Progress:   0%|          | 0/2 [00:00<?, ?it/s]2024-12-12 18:54:48,231 - src.semantic_analyzer.analyzer - INFO - Running keywords analysis...
Analysis Progress:   0%|          | 0/2 [00:00<?, ?it/s]2024-12-12 18:54:48,237 - src.analyzers.excel_support - INFO - Starting keyword analysis on 9 rows



Processing Keywords...


2024-12-12 18:54:55,931 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:56,352 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:56,463 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:56,941 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:57,023 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:57,860 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:57,973 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:58,144 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:54:59,021 - httpx 

✓ Completed keywords analysis

Processing Themes...


2024-12-12 18:55:03,494 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Interactive Learning Environment', 'description': 'The online learning platform provides an engaging and interactive environment through modules and virtual classrooms, enhancing student collaboration and learning.', 'confidence': 0.95, 'keywords': ['interactive', 'virtual', 'collaboration'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Self-Paced Learning', 'description': 'The platform allows students to progress at their own pace, enabling personalized learning experiences and accommodating different learning styles.', 'confidence': 0.9, 'keywords': ['self-paced', 'progress', 'learn'], 'domain': 'business', 'parent_theme': None}, {'name': 'Digital Assessment and Feedback', 'description': 'Digital tools provide immediate feedback on assessments, allowing students to understand their learning outcomes and areas for improvement.', 'confidence': 0.85, 'keywords': ['digital', 'assessment', 'feedback'], 'domain': 'technical', 'parent_them

2024-12-12 18:55:05,924 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Financial Performance', 'description': 'The analysis of financial results indicating revenue growth and profit margins.', 'confidence': 0.95, 'keywords': ['revenue', 'profit', 'growth', 'margins'], 'domain': 'business', 'parent_theme': None}, {'name': 'Customer Dynamics', 'description': 'Trends in customer acquisition costs and retention rates reflecting customer relationship management.', 'confidence': 0.9, 'keywords': ['customer', 'acquisition', 'retention', 'cost'], 'domain': 'business', 'parent_theme': None}, {'name': 'Market Strategy', 'description': 'The approach to market expansion focusing on emerging technology sectors.', 'confidence': 0.85, 'keywords': ['market', 'expansion', 'strategy', 'technology'], 'domain': 'business', 'parent_theme': None}], 'evidence': {'Financial Performance': [{'text': 'Q3 financial results show 15% revenue growth and improved profit margins.', 'relevance': 0.9, 'keywords': ['financial', 'revenue', 'growth', '

2024-12-12 18:55:06,448 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Version Control Systems', 'description': 'Version control systems are essential tools for tracking changes in source code repositories, allowing teams to manage code revisions effectively.', 'confidence': 0.95, 'keywords': ['version', 'control', 'repository', 'track', 'code'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Continuous Integration', 'description': 'Continuous integration is a development practice that ensures code quality through automated testing and integration of code changes.', 'confidence': 0.95, 'keywords': ['continuous', 'integration', 'quality', 'automate', 'test'], 'domain': 'technical', 'parent_theme': None}, {'name': 'Documentation', 'description': 'Documentation is crucial for providing detailed information on API usage and system architecture, facilitating better understanding and maintenance of software systems.', 'confidence': 0.9, 'keywords': ['documentation', 'API', 'usage', 'architecture', 'detail'], 'do

2024-12-12 18:55:06,879 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:55:06,893 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:55:06,957 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Strategic Partnerships', 'description': 'The role of strategic partnerships in fostering innovation and enhancing market presence.', 'confidence': 0.95, 'keywords': ['partnership', 'strategic', 'drive', 'innovation'], 'domain': 'business', 'parent_theme': None}, {'name': 'Product Development and Launch', 'description': 'The impact of investment in research and development on new product launches.', 'confidence': 0.9, 'keywords': ['investment', 'R&D', 'product', 'launch'], 'domain': 'business', 'parent_theme': None}, {'name': 'Sales Performance', 'description': 'The achievement of sales targets and performance in key market segments.', 'confidence': 0.85, 'keywords': ['sales', 'performance', 'exceed', 'target', 'segment'], 'domain': 'business', 'parent_theme': None}], 'evidence': {'Strategic Partnerships': [{'text': 'Strategic partnerships drive innovation and market penetration.', 'relevance': 0.9, 'keywords': ['partnership', 'drive', 'innovatio

2024-12-12 18:55:07,538 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Raw LLM response: {'themes': [{'name': 'Cloud Computing Services', 'description': 'The provision of scalable infrastructure that supports various deployments in a cloud environment.', 'confidence': 0.95, 'keywords': ['cloud', 'computing', 'scalable', 'infrastructure', 'deployment'], 'domain': 'technical/business'}, {'name': 'Microservices Architecture', 'description': 'A design approach that promotes modularity and maintainability in system development, allowing for independent deployment of services.', 'confidence': 0.9, 'keywords': ['microservices', 'architecture', 'modular', 'maintainable', 'system', 'design'], 'domain': 'technical/business'}, {'name': 'API Management', 'description': 'The handling of authentication and data validation through API endpoints, ensuring secure and reliable communication between services.', 'confidence': 0.85, 'keywords': ['API', 'endpoints', 'authentication', 'data', 'validation', 'requirements'], 'domain': 'technical/business'}], 'evidence': {'Cloud 

2024-12-12 18:55:08,472 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:55:08,476 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-12-12 18:55:08,491 - src.analyzers.excel_support - INFO - Theme analysis complete
Analysis Progress: 100%|██████████| 2/2 [00:20<00:00, 10.13s/it]
2024-12-12 18:55:08,499 - src.semantic_analyzer.analyzer - INFO - Combining results...
2024-12-12 18:55:08,512 - src.semantic_analyzer.analyzer - INFO - Saving results...
2024-12-12 18:55:08,586 - LocalStorage - INFO - Saved Excel file with sheets: ['Analysis Results']
2024-12-12 18:55:08,586 - LocalStorage - INFO - Saved Excel file with sheets: ['Analysis Results']



Raw LLM response: {'themes': [{'name': 'Integration of Theory and Practice', 'description': 'The theme emphasizes the combination of theoretical concepts with practical exercises in classroom workshops, highlighting the importance of applying knowledge in real-world scenarios.', 'confidence': 0.95, 'keywords': ['theoretical', 'exercise', 'combine', 'hands-on', 'workshop'], 'domain': 'technical/business', 'parent_theme': None}, {'name': 'Peer Learning and Collaboration', 'description': 'This theme focuses on the role of small group activities in promoting peer learning and knowledge sharing among participants, fostering a collaborative learning environment.', 'confidence': 0.9, 'keywords': ['peer', 'group', 'activity', 'share', 'knowledge'], 'domain': 'technical/business', 'parent_theme': None}, {'name': 'Reinforcement of Learning Objectives', 'description': 'The theme highlights the importance of practice sessions in reinforcing key learning objectives, ensuring that participants effe

2024-12-12 18:55:08,589 - FileUtils.core.file_utils - INFO - Data saved successfully: {'analysis_results_20241212_185508': 'c:\\Users\\tja\\OneDrive - Rastor-instituutti ry\\Tiedostot\\Rastor-instituutti\\kehittäminen\\analytiikka\\repos\\semantic-text-analyzer\\data\\processed\\analysis_results_20241212_185508.xlsx'}
2024-12-12 18:55:08,592 - src.semantic_analyzer.analyzer - INFO - Saved results to: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\processed\analysis_results_20241212_185508.xlsx
2024-12-12 18:55:08,595 - src.semantic_analyzer.analyzer - INFO - Results saved to: c:\Users\tja\OneDrive - Rastor-instituutti ry\Tiedostot\Rastor-instituutti\kehittäminen\analytiikka\repos\semantic-text-analyzer\data\processed\analysis_results_20241212_185508.xlsx
2024-12-12 18:55:08,596 - src.semantic_analyzer.analyzer - INFO - Analysis completed in 20.37 seconds



Analysis Results Summary
Total rows processed: 9
Language: en
Average processing time: 20.29s

Keywords Results
--------------------------------------------------

Row 1:
  id: technical_1
  type: technical
  language: en
  keyword_scores: 0.95, 0.95, 0.90, 0.90, 0.90, 0.85, 0.85, 0.80
  keyword_domains: technical, technical, technical, technical, technical, technical, technical, technical

Row 2:
  id: technical_2
  type: technical
  language: en
  keyword_scores: 0.95, 0.95, 0.95, 0.90
  keyword_domains: technical, technical, technical, technical

Row 3:
  id: technical_3
  type: technical
  language: en
  keyword_scores: 0.95, 0.95, 0.90, 0.90, 0.90, 0.90
  keyword_domains: technical, technical, technical, technical, technical, technical

Themes Results
--------------------------------------------------

Row 1:
  id: technical_1
  type: technical
  language: en
  theme_descriptions: The systematic approach to training machine learning models using large datasets, emphasizing the im