# 🎯 SEMA VOC Analysis

Korean Voice of Customer sentiment analysis for Google Colab.

## Steps:
1. Run setup cell
2. Process files
3. Download results

## Setup

In [ ]:
print("🔧 Setting up environment...")

# Install system dependencies
!apt-get update -qq && apt-get install -y openjdk-8-jdk -qq

# Set Java environment
import os
os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-8-openjdk-amd64'

# Install packages
!pip install -q "huggingface_hub>=0.16.0" "torch>=2.0.0" "transformers>=4.30.0,<5.0.0" "torchmetrics>=0.11.0" "lightning>=2.0.0" konlpy

# Setup repository
!git clone -q https://github.com/shc443/sema_inf.git
%cd sema_inf
!pip install -q -e .

print("✅ Setup complete!")

## Process Files

In [ ]:
from colab_cli import SemaColabCLI

print("🚀 Initializing SEMA...")
sema = SemaColabCLI()

print("📤 Upload Excel files (with VOC1/VOC2 columns):")
uploaded_files = sema.upload_files()

if uploaded_files:
    print(f"🔄 Processing {len(uploaded_files)} files...")
    success_count = sema.process_all_files()
    
    if success_count > 0:
        print(f"🎉 Processed {success_count} files!")
        sema.download_results()
        print("✅ Complete! Check downloads.")
    else:
        print("❌ Processing failed.")
else:
    print("❌ No files uploaded.")

In [ ]:
import os
import torch

input_files = [f for f in os.listdir('data/input') if f.endswith('.xlsx')]
output_files = [f for f in os.listdir('data/output') if f.endswith('.xlsx')]

print(f"📁 Input: {len(input_files)} files")
print(f"📁 Output: {len(output_files)} files")
print(f"🖥️ GPU: {torch.cuda.is_available()}")

## Download Again

In [ ]:
sema.download_results()