In [None]:
# Install required packages
!pip install -q transformers torch fastapi uvicorn pydantic python-multipart
!pip install -q huggingface_hub

In [None]:
# Import dependencies
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import uvicorn
from huggingface_hub import login
import os
from typing import Optional
import time

# Initialize FastAPI
app = FastAPI(title="Financial News Analyzer")

# Model configuration
MODEL_NAME = "cxllin/Llama2-7b-Finance"
MAX_LENGTH = 512
TEMPERATURE = 0.5

In [None]:
# Define data models
class Article(BaseModel):
    id: str
    title: str
    content: str
    source: str
    timestamp: str

class Analysis(BaseModel):
    article_id: str
    timestamp: str
    analysis: str
    model: str
    version: str
    inference_time: Optional[float]

In [None]:
# Load model and tokenizer
print("Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True
)
print("Model loaded successfully")

In [None]:
def create_prompt(article: Article) -> str:
    """Create analysis prompt from article"""
    return f"""Analyze this financial article briefly:

Title: {article.title}
Source: {article.source}
Content: {article.content}

Provide a concise analysis:
1. Summary: Key points in 2-3 sentences
2. Market Impact: Main effects on markets
3. Trading Ideas: 1-2 specific trading opportunities
4. Assets: Key instruments mentioned
5. Risk: Low/Medium/High with brief reason

Keep responses short and focused."""

@app.post("/analyze", response_model=Analysis)
async def analyze_article(article: Article):
    """Analyze a financial article"""
    start_time = time.time()
    
    try:
        # Validate input
        if not article.content:
            raise HTTPException(status_code=400, detail="Article content is empty")
            
        # Create prompt
        prompt = create_prompt(article)
        
        # Tokenize
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH)
        
        # Generate analysis
        with torch.no_grad():
            outputs = model.generate(
                inputs["input_ids"],
                max_new_tokens=MAX_LENGTH,
                temperature=TEMPERATURE,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decode output
        analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Calculate time
        inference_time = time.time() - start_time
        
        return Analysis(
            article_id=article.id,
            timestamp=article.timestamp,
            analysis=analysis,
            model=MODEL_NAME,
            version="1.0",
            inference_time=inference_time
        )
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

In [None]:
# Run the server
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)