# ToyaSapo 分析 ver6: Azure Text Analytics 感情分析

## 概要
- Firestoreからメールデータを取得
- Azure Text Analytics APIを使用して感情分析を実行
- 結果（ポジティブ/ネガティブ/ニュートラル/混合）を集計
- Firestoreの `analysis_results/latest` に保存

In [None]:
!pip install azure-ai-textanalytics firebase-admin

In [None]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
from google.colab import drive
import os
import json
from statistics import mean

In [None]:
# Google Driveをマウント
drive.mount('/content/drive')

In [None]:
# --- 設定 ---
BASE_PATH = '/content/drive/MyDrive/001_PBL_チームD_AI4th'
AZURE_KEY_PATH = os.path.join(BASE_PATH, 'azure_textanalytics_key.txt')
FIREBASE_KEY_PATH = os.path.join(BASE_PATH, 'toyasapo-firebase-adminsdk-fbsvc-93683c6f93.json')
AZURE_ENDPOINT = "https://toyasapo-language.cognitiveservices.azure.com/"

# Azure APIキーの読み込み
try:
    with open(AZURE_KEY_PATH, 'r') as f:
        azure_key = f.read().strip()
    print("Azure API Key loaded successfully.")
except Exception as e:
    print(f"Error loading Azure key: {e}")

# Firebase初期化
if not firebase_admin._apps:
    cred = credentials.Certificate(FIREBASE_KEY_PATH)
    firebase_admin.initialize_app(cred)
    print("Firebase initialized.")
else:
    print("Firebase already initialized.")

db = firestore.client()

In [None]:
# --- 分析実行 ---

def analyze_emails_sentiment():
    # 1. データ取得
    print("Fetching emails from Firestore...")
    docs = db.collection('emails').stream()
    emails = []
    for doc in docs:
        data = doc.to_dict()
        if 'inquiry' in data and data['inquiry']:
            emails.append({
                'id': doc.id,
                'text': data['inquiry'][:5000]  # Azure limit per doc is roughly 5120 chars
            })
    
    print(f"Total emails to analyze: {len(emails)}")

    # 2. Azure Client作成
    credential = AzureKeyCredential(azure_key)
    text_analytics_client = TextAnalyticsClient(endpoint=AZURE_ENDPOINT, credential=credential)

    # 3. バッチ処理で分析 (最大10件ずつ)
    sentiment_results = []
    
    # 結果集計用
    counts = {
        'positive': 0,
        'negative': 0,
        'neutral': 0,
        'mixed': 0
    }
    scores_sum = {
        'positive': [],
        'negative': [],
        'neutral': []
    }

    batch_size = 10
    for i in range(0, len(emails), batch_size):
        batch = emails[i:i + batch_size]
        batch_docs = [e['text'] for e in batch]
        
        # 空のテキストを除外（念のため）
        valid_indices = [idx for idx, txt in enumerate(batch_docs) if txt.strip()]
        if not valid_indices:
            continue
            
        valid_docs = [batch_docs[idx] for idx in valid_indices]
        
        try:
            response = text_analytics_client.analyze_sentiment(valid_docs, language="ja")
            
            for idx, result in enumerate(response):
                if not result.is_error:
                    # 集計
                    sentiment = result.sentiment # positive, negative, neutral, mixed
                    counts[sentiment] += 1
                    
                    scores_sum['positive'].append(result.confidence_scores.positive)
                    scores_sum['negative'].append(result.confidence_scores.negative)
                    scores_sum['neutral'].append(result.confidence_scores.neutral)
        except Exception as e:
            print(f"Error analyzing batch {i}: {e}")
            
        print(f"Processed {min(i + batch_size, len(emails))} / {len(emails)}")

    # 4. 平均スコア計算
    avg_scores = {
        'positive': mean(scores_sum['positive']) if scores_sum['positive'] else 0,
        'negative': mean(scores_sum['negative']) if scores_sum['negative'] else 0,
        'neutral': mean(scores_sum['neutral']) if scores_sum['neutral'] else 0
    }

    # 5. 結果構築
    result_data = {
        'positive_count': counts['positive'],
        'negative_count': counts['negative'],
        'neutral_count': counts['neutral'],
        'mixed_count': counts['mixed'],
        'average_scores': avg_scores
    }

    print("Analysis Complete:", result_data)
    return result_data

sentiment_summary = analyze_emails_sentiment()

In [None]:
# --- Firestoreへの保存 ---
def save_results(sentiment_data):
    doc_ref = db.collection('analysis_results').document('latest')
    
    # 既存データを取得してマージ（またはset with merge=True）
    # ここでは既存の他の分析データ（形態素解析など）を消さないようにmergeする
    doc_ref.set({
        'sentiment_summary': sentiment_data,
        'updated_at': firestore.SERVER_TIMESTAMP
    }, merge=True)
    
    print("Results saved to Firestore: analysis_results/latest")

if sentiment_summary:
    save_results(sentiment_summary)