# SOCIOTYPER - Colab Notebook

Extract organizational relationship triplets from text using AI.

## Setup
1. Run all cells in order
2. Copy the ngrok URL when displayed
3. Open `ui/index.html` locally and paste the URL in Configure tab

In [None]:
# Install dependencies
!pip install -q flask flask-cors pyngrok transformers accelerate torch rapidfuzz spacy beautifulsoup4 requests sentence-transformers pydantic

In [None]:
# Download spaCy model
!python -m spacy download en_core_web_sm

In [None]:
# Clone the repository (if not already done)
import os
if not os.path.exists('EIT-News-Triples'):
    !git clone https://github.com/stanley7/EIT-News-Triples.git
os.chdir('EIT-News-Triples')

In [None]:
# Add package to path
import sys
sys.path.insert(0, '.')

# Verify import
from sociotyper.api import create_app, SociotyperAPI
from sociotyper.actors import get_all_actors
print(f"Loaded {len(get_all_actors())} actors")

In [None]:
# Initialize API with models
# This will load Mistral 7B - takes a few minutes
print("Loading models... (this takes 2-3 minutes)")
api = SociotyperAPI(load_models=True, models_to_load=["mistral"])
print("Models loaded!")

In [None]:
# Start Flask server with ngrok
from pyngrok import ngrok
from threading import Thread
from google.colab import userdata
import time

# Get ngrok token from Colab secrets
# Add your ngrok token to Colab secrets as 'NGROK'
try:
    ngrok_token = userdata.get('NGROK')
    ngrok.set_auth_token(ngrok_token)
except:
    print("Warning: NGROK secret not found. Add your ngrok token to Colab secrets.")
    print("Get a free token at: https://dashboard.ngrok.com/get-started/your-authtoken")

# Create and start app
app = create_app(api)

# Start ngrok tunnel
public_url = ngrok.connect(5050)

print("\n" + "="*70)
print("SOCIOTYPER API READY")
print("="*70)
print(f"\nAPI URL: {public_url.public_url}")
print(f"\nEndpoints:")
print(f"  - {public_url.public_url}/models")
print(f"  - {public_url.public_url}/extract_triplets")
print(f"  - {public_url.public_url}/scrape_url")
print("\nCopy this URL to the SOCIOTYPER UI (Configure tab)")
print("="*70)

# Run Flask in background
Thread(target=lambda: app.run(port=5050, debug=False, use_reloader=False)).start()

In [None]:
# Keep the notebook running
# The API will stay active as long as this cell is running
print("Server is running. Press Stop to shut down.")
try:
    while True:
        time.sleep(60)
        print(".", end="", flush=True)
except KeyboardInterrupt:
    print("\nServer stopped.")