## Cluster metacog labels using the Della-Inference API

In [None]:
import requests
import json
import os
import pandas as pd

In [None]:
url = "http://localhost:12257/v1/chat/completions"

headers={
    "Content-Type": "application/json",
    "Authorization": "token-abc123"
}

model_name = 'meta-llama/Meta-Llama-3.1-70B-Instruct'

In [None]:
def get_metacog_cluster_prompt(properties):
    system_prompt = """I have a list of properties describing questions asked during Supreme Court oral arguments. Your task is to group similar properties into broader categories and provide a descriptive name for each category. The category name should be lower case letters only. If the category name has multiple words, join them with an underscore. Basically, you should be able to use the category name as a dictionary key in python.
    The output should be in JSON format for easy parsing. Each category should contain its name, a brief description and a list of the grouped properties in that category.
    
      ### Instructions:
        1. Group similar properties together based on shared themes or purposes.
        2. Name each category with a descriptive label.
        3. Return the result in JSON format structured as follows:

        {
            "category_name": "<Your Category Name>",
            "description": "<A brief description of the category>",
            "properties_in_category": [
                "property_1",
                "property_2",
                ...
            ],
            "category_name": "<Your Category Name>",
            "description": "<A brief description of the category>",
            "properties_in_category": [
                "property_1",
                "property_2",
                ...
            ],
        }
    """
    
    user_prompt = f"""### Your Task:
        Properties: 
        {properties}

        ### Output:
    """

    messages = [
            {
                "role": "system",
                "content": system_prompt,
            },
            {"role": "user", "content": user_prompt}
        ]
    return messages

In [None]:
def get_model_response(messages):

    payload = {
        "model": model_name,
        "messages": messages
    }

    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response

def parse_response(response):
    decoded = response.content.decode('utf-8')
    response_data = json.loads(decoded)
    content = response_data['choices'][0]['message']['content']

    # try:
    #     tags = json.dumps(content)
    # except Exception as e:
    #     print(f"Unable to jsonify response, saving string itself. ERROR: {e}")
    #     tags = str(content)
    # return tags
    return content

## Call model

In [None]:
def generate_clusters(property_list):
    messages = get_metacog_cluster_prompt(property_list)
    response = get_model_response(messages)
    tags = parse_response(response)
    return tags

In [None]:
# Process each JSON file in the input directory
def process_json_files(input_directory, output_directory):
    for file_name in os.listdir(input_directory):
        if file_name.endswith(".json"):
            input_file_path = os.path.join(input_directory, file_name)
            
            # Read the JSON file
            with open(input_file_path, 'r') as file:
                properties = json.load(file)
            
            # Get the model response
            # model_response = generate_clusters(properties)
            model_response = ['']
            
            # Save the response to a new JSON file
            output_file_path = os.path.join(output_directory, f"cluster_raw_{file_name}")
            with open(output_file_path, 'w') as output_file:
                json.dump(model_response, output_file, indent=4)
            
            print(f"Processed and saved response for {file_name}")



In [None]:
input_directory = f'../analysis/metacog/label_jsons/'
output_directory = "../analysis/metacog/clustering_responses/"

# Run the processing
process_json_files(input_directory, output_directory)
