In [5]:
# Import ray serve and FastAPI libraries
import ray
from ray import serve
from fastapi import FastAPI
import requests 

# library for pre-trained models
from transformers import pipeline

In [2]:
# Define a simple FastAPI app
app = FastAPI()

# Define a Ray Serve deployment
# This decorator registers the class as a Ray Serve deployment
@serve.deployment(num_replicas=2) # num_replicas specifies the number of replicas for load balancing
@serve.ingress(app) # This decorator allows the FastAPI app to be served by Ray Serve
class MySentimentModel:
    def __init__(self):
        # Load a pre-trained sentiment analysis model
        self.model = pipeline("sentiment-analysis",
                              model="distilbert-base-uncased-finetuned-sst-2-english")

    # Define any necessary application logic or transformation logic
    def application_logic(self, text):
        """        Apply any necessary application logic to the input text.
        """
        # simple application logic: truncate text if it exceeds a certain length
        if len(text) > 50:
            return text[:50].lower()  # Truncate and convert to lowercase
        else:
            return text.lower()
        
    @app.get("/predict") # Define an endpoint for predictions
    def predict(self, text: str):
        """        Predict sentiment for the given text.
        """
        # Define any necessary application logic or transformation logic
        text = self.application_logic(text) # Apply any necessary application logic to the input text

        # Use the model to make a prediction
        result = self.model(text)
        return {"text": text, "sentiment": result}

In [3]:
serve.run(MySentimentModel.bind())

2025-08-02 15:48:38,829	INFO worker.py:1747 -- Connecting to existing Ray cluster at address: 100.77.61.72:6379...
2025-08-02 15:48:38,840	INFO worker.py:1918 -- Connected to Ray cluster. View the dashboard at [1m[32mhttps://session-xsclvf1y3h8ri22vxrxzy7b516.i.anyscaleuserdata.com [39m[22m
2025-08-02 15:48:38,842	INFO packaging.py:380 -- Pushing file package 'gcs://_ray_pkg_fefd6d66be450f94ac647d52611cb36898e3dd4f.zip' (0.13MiB) to Ray cluster...
2025-08-02 15:48:38,843	INFO packaging.py:393 -- Successfully pushed file package 'gcs://_ray_pkg_fefd6d66be450f94ac647d52611cb36898e3dd4f.zip'.
[36m(ProxyActor pid=6804)[0m INFO 2025-08-02 15:48:45,966 proxy 100.77.61.72 -- Proxy starting on node 06627b0ac8fc4ef17224267e74272b5b4c65596a323b8e70e242b4d3 (HTTP port: 8000).
[36m(ProxyActor pid=6804)[0m INFO 2025-08-02 15:48:46,020 proxy 100.77.61.72 -- Got updated endpoints: {}.
INFO 2025-08-02 15:48:46,050 serve 6518 -- Started Serve in namespace "serve".


[36m(autoscaler +28s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.


[36m(ServeController pid=6740)[0m INFO 2025-08-02 15:48:50,269 controller 6740 -- Deploying new version of Deployment(name='MySentimentModel', app='default') (initial target replicas: 2).
[36m(ProxyActor pid=6804)[0m INFO 2025-08-02 15:48:50,281 proxy 100.77.61.72 -- Got updated endpoints: {Deployment(name='MySentimentModel', app='default'): EndpointInfo(route='/', app_is_cross_language=False)}.
[36m(ProxyActor pid=6804)[0m INFO 2025-08-02 15:48:50,313 proxy 100.77.61.72 -- Started <ray.serve._private.router.SharedRouterLongPollClient object at 0x7b2c1a9cd1c0>.
[36m(ServeController pid=6740)[0m INFO 2025-08-02 15:48:50,381 controller 6740 -- Adding 2 replicas to Deployment(name='MySentimentModel', app='default').


In [3]:
def get_response(text_payload :str):
    response = requests.get("http://localhost:8000/predict", params={"text": text_payload})
    return response.json()

In [6]:
get_response("There are libraries built on top of Ray")

{'text': 'there are libraries built on top of ray',
 'sentiment': [{'label': 'POSITIVE', 'score': 0.8838600516319275}]}

In [8]:
get_response("Edinburgh has a buzzing ML community")

{'text': 'edinburgh has a buzzing ml community',
 'sentiment': [{'label': 'POSITIVE', 'score': 0.9889107942581177}]}

In [9]:
get_response("Too much heat drains me")

{'text': 'too much heat drains me',
 'sentiment': [{'label': 'NEGATIVE', 'score': 0.999446451663971}]}

## Build Anyscale service now
- Create ray serve deployment script
- Create an Image (if needed) using anyscale container images 
- Populate the service.yaml file
- deploy the service `anyscale service deploy -f service.yaml`


In [3]:
import requests

def query_sentiment(
    text: str,
    base_url: str = "https://sentiment-service-hq3g3.cld-bgtd6qup2pckeg3i.s.anyscaleuserdata.com",
    token: str = "LSbnFASf6jf3zP3gkyuPmc78TbmjPA34S7hsjF52UFE",
    route: str = "/predict",
    timeout: float = 10.0,
) -> dict:
    """
    Query the deployed Ray Serve sentiment model.

    Args:
        text (str): Input text to analyze.
        base_url (str): The base URL of the service endpoint.
        token (str): The Bearer token for authentication.
        route (str): The route of the prediction endpoint.
        timeout (float): Request timeout in seconds.

    Returns:
        dict: The JSON response from the service.
    """
    url = f"{base_url}{route}"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"text": text}
    resp = requests.get(url, headers=headers, params=params)
    return resp.json()

In [5]:

result = query_sentiment("There are libraries built on top of Ray")
print(result)

{'text': 'there are libraries built on top of ray', 'sentiment': [{'label': 'POSITIVE', 'score': 0.8838600516319275}]}


In [6]:

result = query_sentiment("Edinburgh has a buzzing ML community")
print(result)


{'text': 'edinburgh has a buzzing ml community', 'sentiment': [{'label': 'POSITIVE', 'score': 0.9889109134674072}]}


In [7]:

result = query_sentiment("Too much heat drains me")
print(result)

{'text': 'too much heat drains me', 'sentiment': [{'label': 'NEGATIVE', 'score': 0.999446451663971}]}
