In [None]:
!pip install sentence_transformers
!pip install faiss-cpu

In [None]:
import os, torch
os.environ['TOKENIZERS_PARALLELISM'] = 'false'  # avoid CPU thread thrash on GPU runs

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Detect device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device:", device, "| GPU name:", torch.cuda.get_device_name(0) if device=='cuda' else "None")

# Load model ONCE, on the right device, with FP16 on GPU
embedder = SentenceTransformer(
    'Alibaba-NLP/gte-base-en-v1.5',
    trust_remote_code=True,
    device=device,
    model_kwargs={"torch_dtype": torch.float16} if device == 'cuda' else {}
)
embedder.max_seq_length = 2048

def create_and_save_embeddings(text_file: str, output_prefix: str, embedder=embedder):
    if os.path.exists(f"{output_prefix}_embeddings.npy"):
        print(f"  ✓ Embeddings already exist for {text_file}, skipping...")
        return

    print(f"\nProcessing {text_file}...")

    with open(text_file, 'r', encoding='utf-8') as f:
        content = f.read()

    texts = [a.strip() for a in content.split('</analysis>') if a.strip() and '<analysis>' in a]
    if not texts:
        print(f"  ⚠ No analyses found in {text_file}")
        return

    print(f"  Creating embeddings for {len(texts)} analyses...")

    # Larger batch on T4; fall back if OOM
    batch_size = 16 #if device == 'cuda' else 32
    try:
        # Keep on GPU during encode to avoid per-batch CPU copies
        emb_t = embedder.encode(
            texts,
            batch_size=batch_size,
            normalize_embeddings=True,
            show_progress_bar=True,
            convert_to_tensor=True,     # tensor on GPU
        )
    except RuntimeError as e:
        if "CUDA out of memory" in str(e):
            torch.cuda.empty_cache()
            batch_size = 64 if device == 'cuda' else 16
            print(f"  ⚠ OOM detected. Retrying with batch_size={batch_size}...")
            emb_t = embedder.encode(
                texts,
                batch_size=batch_size,
                normalize_embeddings=True,
                show_progress_bar=True,
                convert_to_tensor=True,
            )
        else:
            raise

    # Move once to CPU at the end
    embeddings = emb_t.detach().cpu().numpy().astype('float32')

    # Build FAISS index (CPU is fine; encoding was the bottleneck)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)

    np.save(f"{output_prefix}_embeddings.npy", embeddings)
    faiss.write_index(index, f"{output_prefix}_index.faiss")
    print(f"  ✓ Saved embeddings and index")

# Run
create_and_save_embeddings('equity_analyses.txt', 'morningstar_embeddings_equity')
create_and_save_embeddings('fixed_income_analyses.txt', 'morningstar_embeddings_fixed_income')
create_and_save_embeddings('allocation_analyses.txt', 'morningstar_embeddings_allocation')
