## Example of using OpenAI LLM Hosted in Azure via REST endpoint

### We have a list of US Attorney's districts, and we want to augment this data with a list of counties for each district

In [10]:
import json
import requests

def get_counties_for_district(district):

    # Set your LLM endpoint, deployment name, and API key
    azure_openai_endpoint = "YOUR LLM ENDPOINT URL"
    deployment_name = "gpt-35-turbo-0613"
    api_key = "YOUR API KEY"          
    url = f"{azure_openai_endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version=2023-05-15"
    
    # Define the headers, including the API key for authentication
    headers = {
        "Content-Type": "application/json",
        "api-key": api_key,
        "Authorization": f"Bearer {api_key}"
    }
    
    # Define the payload (the prompt and other parameters)
    payload = {
        "messages": [{"role": "system", "content": "You are a super intelligent AI that maps counties to US Attorney Districts"},
            {"role": "user","content": f"""Please give me a comma separated list of counties that are in the US Attorney District: {district}  
                            Provide only the comma separated list without any additional text."""
            }],   
        "model": "gpt-3.5-turbo-0613",  
        "max_tokens": 500,
        "temperature": .2
    }
    
    # Send the request to the OpenAI API
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    
    # Check if the request was successful
    if response.status_code == 200:
        response_json = response.json()
        return response_json['choices'][0]['message']['content']
    else:
        # Print the error message
        print(f"Error: {response.status_code}")
        print(response.json())

In [4]:
import pandas as pd
df = pd.read_csv("us_attorney_districts.csv")
df.head(10)

Unnamed: 0,district_name,state,num_districts_in_state
0,Northern District of Alabama,Alabama,3
1,Middle District of Alabama,Alabama,3
2,Southern District of Alabama,Alabama,3
3,District of Alaska,Alaska,1
4,District of Arizona,Arizona,1
5,Eastern District of Arkansas,Arkansas,2
6,Western District of Arkansas,Arkansas,2
7,Central District of California,California,4
8,Eastern District of California,California,4
9,Northern District of California,California,4


In [15]:
counties  = get_counties_for_district('Western District of North Carolina')
print(counties)

Alamance, Alexander, Alleghany, Ashe, Avery, Buncombe, Burke, Cabarrus, Caldwell, Caswell, Catawba, Cherokee, Clay, Cleveland, Gaston, Graham, Haywood, Henderson, Iredell, Jackson, Lincoln, Macon, Madison, McDowell, Mecklenburg, Mitchell, Polk, Rutherford, Stanly, Swain, Transylvania, Union, Watauga, Wilkes, Yadkin, and Yancey.


In [16]:
df['counties'] = ''
for index, row in df.iterrows():
    #if (row['num_districts_in_state'] > 1):
    #if pd.isna(row['counties']):            
    counties  = get_counties_for_district(row['district_name'])
    print(index, " -> ", row['district_name'], ":   ", counties)
    print("-------------------------------------------------------------")
    df.at[index, 'counties'] = counties

0  ->  Northern District of Alabama :    Blount, Cherokee, Colbert, Cullman, DeKalb, Etowah, Fayette, Franklin, Jackson, Lamar, Lauderdale, Lawrence, Limestone, Madison, Marion, Marshall, Morgan, Pickens, Randolph, St. Clair, Tuscaloosa, Walker, Winston
-------------------------------------------------------------
1  ->  Middle District of Alabama :    Autauga County, Baldwin County, Barbour County, Bullock County, Butler County, Chambers County, Cherokee County, Chilton County, Choctaw County, Clarke County, Clay County, Cleburne County, Coffee County, Colbert County, Conecuh County, Coosa County, Covington County, Crenshaw County, Cullman County, Dale County, Dallas County, DeKalb County, Elmore County, Escambia County, Etowah County, Fayette County, Franklin County, Geneva County, Greene County, Hale County, Henry County, Houston County, Jackson County, Jefferson County, Lamar County, Lauderdale County, Lawrence County, Lee County, Limestone County, Lowndes County, Macon County, Mad

In [17]:
df.head(10)

Unnamed: 0,district_name,state,num_districts_in_state,counties
0,Northern District of Alabama,Alabama,3,"Blount, Cherokee, Colbert, Cullman, DeKalb, Et..."
1,Middle District of Alabama,Alabama,3,"Autauga County, Baldwin County, Barbour County..."
2,Southern District of Alabama,Alabama,3,"Autauga County, Baldwin County, Barbour County..."
3,District of Alaska,Alaska,1,"Anchorage, Bethel, Dillingham, Fairbanks, June..."
4,District of Arizona,Arizona,1,"Apache, Cochise, Coconino, Gila, Graham, Green..."
5,Eastern District of Arkansas,Arkansas,2,"Arkansas, Ashley, Baxter, Benton, Boone, Bradl..."
6,Western District of Arkansas,Arkansas,2,"Benton, Boone, Carroll, Crawford, Franklin, Jo..."
7,Central District of California,California,4,"Los Angeles, Orange, Riverside, San Bernardino..."
8,Eastern District of California,California,4,"Alpine, Amador, Butte, Calaveras, Colusa, El D..."
9,Northern District of California,California,4,"Alameda, Contra Costa, Del Norte, Humboldt, La..."


In [18]:
#perform some post processing to get to one row per county
df['counties'] = df['counties'].str.split(',')
df = df.explode('counties').reset_index(drop=True)
df = df.rename(columns={'counties': 'county'})

In [22]:
df.head(10)
df.to_csv("us_attorney_districts_by_county.csv", index=False)