In [1]:
# Versa API workstation setup
# https://git.ucsf.edu/academic-research-systems/versa-api-workstation/

# Mulesoft/Azure API test
# https://git.ucsf.edu/academic-research-systems/azure-openai-demo/blob/master/test_mulesoft_azure_api.ipynb

In [2]:
import os

In [3]:
os.environ["API_KEY"] = "your-API-key"
os.environ["API_VERSION"]='2024-02-01'
os.environ["RESOURCE_ENDPOINT"]="https://unified-api.ucsf.edu/general"

In [4]:
import os
import re
import json
import base64
import requests
import urllib.parse
import pandas as pd
import pickle

In [5]:
API_KEY = os.environ.get('API_KEY')  # Match the environment variable name to the name you used in the .env file
API_VERSION = os.environ.get('API_VERSION')
RESOURCE_ENDPOINT = os.environ.get('RESOURCE_ENDPOINT')

In [6]:
# Add future/delete deprecated deployments to/from the lists below
completions_deployments = ['text-davinci-003', 'gpt-35-turbo-instruct']
embeddings_deployments = ['text-embedding-ada-002', ]
chat_deployments = ['gpt-35-turbo',
                    'gpt-35-turbo-0301',
                    'gpt-4',
                    'gpt-35-turbo-16K',
                    'gpt-4-32K',
                    'gpt-4-turbo',
                    'gpt-4o-2024-05-13'
                    ] # Deployment choice uniquely determines the underlying model


def test_key():
    assert API_KEY is not None and API_KEY.strip() != "", "API Key is missing or empty"
    try:
        redacted_key = API_KEY[0] + "*" * (len(API_KEY) - 3) + API_KEY[-2:]
        base64.b64decode(API_KEY)
        print(f"API Key is a valid base64 string with length={len(API_KEY)}: {redacted_key}")
    except Exception as e:
        assert False, f"API Key is not a valid base64 string: {redacted_key} " + str(e)


def test_version():
    assert API_VERSION is not None and API_VERSION.strip() != "", "API Version is missing or empty"
    pattern = r'\d{4}-\d{2}-\d{2}'  # matches four digits-two digits-two digits
    assert re.fullmatch(pattern,
                        API_VERSION) is not None, f"API version has invalid format, it should be like: yyyy-mm-dd: {API_VERSION}"
    print(f"API version has valid format: yyyy-mm-dd: {API_VERSION}")


def test_endpoint():
    assert RESOURCE_ENDPOINT is not None and RESOURCE_ENDPOINT.strip() != "", "Resource endpoint is missing or empty"
    url = urllib.parse.urlparse(RESOURCE_ENDPOINT)
    assert all([url.scheme, url.netloc]), f"Resource endpoint is not a valid URL: {RESOURCE_ENDPOINT}"
    print(f"Resource endpoint is a valid URL: {RESOURCE_ENDPOINT}")


def test_completions():
    for deployment_id in completions_deployments:
        print(f"\nTesting completions for deployment: {deployment_id}")
        completions_url = f"{RESOURCE_ENDPOINT}/openai/deployments/{deployment_id}/completions?api-version={API_VERSION}"
        prompt = 'The rain in Spain'
        body = json.dumps({
            "prompt": prompt,
            "max_tokens": 30,  # Limit the response
        })
        headers = {
            'Content-Type': 'application/json',
            'api-key': API_KEY
        }
        response = requests.post(completions_url, headers=headers, data=body)
        assert response.status_code == 200, f"Test failed for deployment: {deployment_id}, Response status code: {response.status_code}, Response: {response.text}"
        print('User: ', prompt)
        print('Response: ', json.loads(response.text).get('choices')[0].get('text'))


def test_embeddings():
    for deployment_id in embeddings_deployments:
        print(f"\nTesting embeddings for deployment: {deployment_id}")
        embeddings_url = f"{RESOURCE_ENDPOINT}/openai/deployments/{deployment_id}/embeddings?api-version={API_VERSION}"
        body = json.dumps({
            "input": "This is test string to embed",
        })
        headers = {
            'Content-Type': 'application/json',
            'api-key': API_KEY
        }
        response = requests.post(embeddings_url, headers=headers, data=body)
        assert response.status_code == 200, f"Test failed for deployment: {deployment_id}, Response status code: {response.status_code}, Response: {response.text}"
        embedding_len = len(json.loads(response.text)['data'][0]['embedding'])
        # print(embedding_len)

        if deployment_id == 'text-embedding-ada-002':
            assert_len = 1536
        else:
            raise ValueError(f'Deployment {deployment_id} not supported for validation. Check code')

        assert embedding_len == assert_len, f"Test failed for deployment: {deployment_id}, Response status code: {response.status_code}, Response: {response.text}"

        print('Embedding received from API')


def test_chat_completions():
    for deployment in chat_deployments:
        print(f"\nTesting chat completions for deployment: {deployment}")
        url = f'{RESOURCE_ENDPOINT}/openai/deployments/{deployment}/chat/completions?api-version={API_VERSION}'
        prompt = 'Hello, how are you today?'

        body = json.dumps({
            "messages": [{"role": "user", "content": prompt}]
        })
        headers = {'Content-Type': 'application/json', 'api-key': API_KEY}
        response = requests.post(url, headers=headers, data=body)
        print('User: ', prompt)
        print('Response: ', json.loads(response.text).get('choices')[0].get('message').get('content'))
        assert response.status_code == 200, f"Test failed for deployment: {deployment}, model: {model}, Response status code: {response.status_code}, Response: {response.text}"

In [9]:
# # Execute the tests
# # First perform some basic validation of our environment variables
# test_key()
# test_version()
# test_endpoint()

# # Most API users will only need to use chat completions
# test_chat_completions()  # Responds as an AI assistant
# test_completions()  # Continues a thought or sentence in the prompt
# test_embeddings()  # Validates a properly formed embedding

In [7]:
def get_response(prompt, model='gpt-4o-2024-05-13', temperature=0.1):
    url = f'{RESOURCE_ENDPOINT}/openai/deployments/{model}/chat/completions?api-version={API_VERSION}'

    body = json.dumps({
        "messages": [{"role": "user", "content": prompt}]
    })
    headers = {'Content-Type': 'application/json', 'api-key': API_KEY}
    response = requests.post(url, headers=headers, data=body)
    return json.loads(response.text).get('choices')[0].get('message').get('content')

In [9]:
def clean_text(text):
    return re.sub(r"\s+", " ", text)

In [16]:
# CSV file with 2 columns: study_id, radiology_report_text
df = pd.read_csv('/path/to/radiology_reports.csv')

## Send to Versa

In [None]:
prompt_categories = ["single_prompt", "single_prompt_with_explanation"]
prompt_prefixes = {
    "single_prompt": "Provide an AO/OTA classification label for the following fracture description. Be as precise as possible, including subgroups, universal modifiers, and qualifiers if available. Provide only the label in your response, with no explanation. If there are multiple fractures identified, provide a comma-separated list.",
    "single_prompt_with_explanation": "Provide an AO/OTA classification label for the following fracture description. Be as precise as possible, including subgroups, universal modifiers, and qualifiers if available. Provide the label in your response, followed by a brief explanation for each part of the classification. If there are multiple fractures identified, provide a comma-separated list."
}

In [None]:
results = []
for _, row in df.iterrows():
    for prompt in prompt_categories:
        full_prompt = f"{prompt_prefixes[prompt]}: {row['radiology_report_text']}"
        response = get_response(full_prompt)
        results.append((row['study_id'], prompt, prompt_prefixes[prompt], row['radiology_report_text'], response))

In [None]:
results = pd.DataFrame(results, columns=["study_id", "prompt", "prompt_prefix", "radiology_report_text", "response"])

In [130]:
results.to_csv("/path/to/output_file.csv")