## Example of using OpenAI LLM Hosted in Azure via REST endpoint

### We have a list of US Attorney's districts, and we want to augment this data with a list of counties for each district

In [None]:
import json
import requests

def get_counties_for_district(district):

    # Set your LLM endpoint, deployment name, and API key
    azure_openai_endpoint = "https://yourendpoint.openai.azure.com"
    deployment_name = "gpt-35-turbo-0613"
    api_key = "yourapikeyxxxx"            
    url = f"{azure_openai_endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version=2023-05-15"
    
    # Define the headers, including the API key for authentication
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    # Define the payload (the prompt and other parameters)
    payload = {
        "messages": [{"role": "system", "content": "You are a super intelligent AI that maps counties to US Attorney Districts"},
            {"role": "user","content": f"""Please give me a comma separated list of counties that are in the US Attorney District: {district}  
                            Provide only the comma separated list without any additional text."""
            }],   
        "model": "gpt-3.5-turbo-0613",  
        "max_tokens": 500,
        "temperature": .2
    }
    
    # Send the request to the OpenAI API
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    
    # Check if the request was successful
    if response.status_code == 200:
        response_json = response.json()
        return response_json['choices'][0]['message']['content']
    else:
        # Print the error message
        print(f"Error: {response.status_code}")
        print(response.json())

In [122]:
import pandas as pd

df = pd.read_csv("us_attorney_districts.csv")

for index, row in df.iterrows():
    if (row['num_districts_in_state'] > 1):
        if pd.isna(row['counties']):            
            counties  = get_counties_for_district(row['district_name'])
            print(index, " -> ", row['district_name'], ":   ", counties)
            print("-------------------------------------------------------------")
            df.at[index, 'counties'] = counties

6  ->  Western District of Arkansas :    Benton, Carroll, Crawford, Franklin, Johnson, Logan, Madison, Newton, Pope, Scott, Sebastian, Washington
-------------------------------------------------------------
7  ->  Central District of California :    Alameda, San Bernardino, Riverside, San Luis Obispo, Los Angeles, Santa Barbara, Santa Clara, Ventura, Orange, San Diego, Imperial, Inyo, Mono, and Kern.
-------------------------------------------------------------
8  ->  Eastern District of California :    Alpine, Amador, Butte, Calaveras, Colusa, El Dorado, Glenn, Lassen, Modoc, Mono, Nevada, Placer, Plumas, Sacramento, San Joaquin, Shasta, Sierra, Siskiyou, Solano, Sutter, Tehama, Trinity, Yolo, Yuba
-------------------------------------------------------------
9  ->  Northern District of California :    Alameda, Contra Costa, Del Norte, Humboldt, Lake, Marin, Mendocino, Napa, San Francisco, San Mateo, Santa Clara, Santa Cruz, Solano, Sonoma, Trinity
-----------------------------------

In [127]:
df['counties'] = df['counties'].str.split(',')
df = df.explode('counties').reset_index(drop=True)

In [None]:
df = df.rename(columns={'counties': 'county'})

In [133]:
df.head(5)
df.to_csv("us_attorney_districts_by_county.csv", index=False)