## Setup
#### Load the API key and relevant Python libaries.

In [18]:
import io
from dotenv import dotenv_values, load_dotenv, find_dotenv
import openai
from openai import OpenAI
import os
from copy import deepcopy
import json
import time
import ast
import re

import pprint

# LLM Open AI

In [19]:
# Get the first key from the uploaded dictionary
env_file_key = "../../auixiliary/env_HumanRights"

# Open the file and read its content
with open(env_file_key, 'r', encoding='utf-8') as file:
    env_content = file.read()

# Load the content into a variable
env_variables = dotenv_values(stream=io.StringIO(env_content))

api_key = env_variables['OPENAI_API_KEY']
# openai.api_key = api_key

client = OpenAI(
    # This is the default and can be omitted
    api_key=api_key,
)

# Models

In [20]:
def chat_gpt(prompt, temperature=0):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=prompt,
        temperature=temperature 
    )
    return response.choices[0].message.content.strip()

# Iterate and Save Use Riskiness Results

# Functions

In [21]:
def replace_key(d, old_key, new_key):
  """
  Replace `old_key` with `new_key` in dictionary `d`.
  The associated value is retained.
  """
  if old_key in d:
      d[new_key] = d.pop(old_key)
  return d

## Read In Prompt Result

In [22]:
def read_prompt_output(file_path):

  # Read the uploaded file
  with open(file_path, 'r') as file:
      data = json.load(file)

  return data

In [23]:
SDG_risks = read_prompt_output("../../results/SDGs/FULL_SDG_risks.json")
HR_risks = read_prompt_output("../../results/HRs/FULL_HR_risks.json")

In [24]:
# HR_risks

In [25]:
HR_risk_ids = [el["id"] for el in HR_risks]
SDG_risk_ids = [el["id"] for el in SDG_risks]

In [26]:
risk_ids = set(HR_risk_ids+SDG_risk_ids)

# PROMPT: Categorisation

In [42]:
# Assuming you have the variables domain, purpose, aiCapability, aiUser, and aiSubject defined with appropriate values

MESSAGES = [
    {
        'role': 'system',
        'content': """As a distinguished expert in Artificial Intelligence technology, you embody the forefront of Responsible Artificial Intelligence (RAI). Your expertise is not just technical; it's deeply rooted in a conscientious approach to the ethical, social, and environmental implications of AI. With a wealth of experience, you navigate the intricate balance between harnessing AI's potential for positive impact and mitigating its risks. Your work is pivotal in aligning AI innovations with the Sustainable Development Goals (SDGs), safeguarding Human Rights (HRs), and ensuring the welfare of individuals, society, and the planet. Your decisiveness, coupled with a profound understanding of AI's capabilities and risks, positions you as a key influencer in promoting responsible AI practices that respect ethical boundaries and contribute to a sustainable future."""
    },
    {
        'role': 'user',
        'content': """You are provided with an AI technology use description through five concepts:

        UseID: "{}",
        Domain: "{}",
        Purpose: "{}",
        Capability: "{}",
        AI User: "{}",
        AI Subject: "{}"

        followed by a list of risks of that AI use, including Sustainable Development Goals (SDGs) risks:

        "SDG Assessment":
        "{}"

        and Human Rights (HR) risks:

        "Assessment of impact on human rights":
        "{}"

        Second, please be aware that in certain scenarios, there may be an absence of risks related to either HRs or SDGs. In such instances, you should disregard the missing risk category and proceed with the evaluation based on the available risk information, if any. If a scenario arises where no risk information is provided for a specific use case, pertaining to neither SDGs nor HRs, then you should generate an output in the form of an empty JSON structure, as illustrated below:
        {{
          "AI System Use Description": "",
          "Overall Risk Assessment": []
        }}

        ***Check your planned output before outputting it: if it contains any explanations besides the JSON string, omit the explanations. Make sure to output ONLY a correctly formatted JSON string and nothing else.***

        If there are any risks provided, you will group theses risks of the given technology use along two axes:
        * Axis 1: capability, human interaction, and systemic;
        * Axis 2: AI Subject, AI User, and Institutions, General Public and Environment;

        where for the first axis, the three evaluation layers for risks are distinguished by the target of analysis:

        (1) Capability: targets AI systems, their technical components, and the processes by which these systems and components are created, including:​
          (a) Outputs of an AI system, for example, model performance, efficiency metrics such as energy use, the extent to which an AI model reproduces harmful stereotypes, factual errors, or displays advanced capabilities that present safety hazards,​
          (b) The data on which a model is trained, for example, the diversity and representativeness of the data, the presence of sensitive data, the learned associations of a trained AI system,​
          (c) Filters and techniques for reducing system harms, for example, filters for toxic language.​

        (2) Human Interaction: targets the experience of people interacting with AI systems and their effects on these people, including:​
          (a) The system's usability, for example, whether the AI system performs its intended function at the point of use, how experiences differ between user groups, and how easy it is to use a model for malicious ends,​
          (b) Potential externalities, for example, whether human-AI interaction leads to unintended effects on the person interacting with or exposed to AI outputs, such as overreliance on AI systems, overtrust, and cognitive biases,​
          (c) Potential harms, including harms to data annotators and harms arising from different system modalities (e.g., video, image, text),​
          (d) The overall quality of outcomes in human-AI assisted tasks compared to human-human assisted tasks.​

        (3) Systemic Impact: targets the impact of an AI system on the broader systems in which it is embedded, such as society, the economy, and the natural environment, including:​
          (a) Systems of various domains, sizes, industries, or goods,​
          (b) Adoption and perception of AI across different systems,​
          (c) Distribution of benefits and risks from the AI,​
          (d) Environmental impacts of the AI on the systems, e.g., biodiversity and resilience of local ecosystems.​


        and for the second axis, they are distinguished by the stakeholder who is impacted:

        * AI User: The entity or individual in charge of deploying and managing the AI system, including individuals, organizations, corporations, public authorities, and agencies responsible for its operation and management;
        * AI Subject: The individual directly affected by the use of the AI system, experiencing its effects and consequences. They interact with or are impacted by the AI system's processes, decisions, or outcomes;
        * Institutions, General Public and Environment: includes societal and nature elements affected by the AI system use.

      NB: It is very important that each "Risk Description" you create be formatted like this: Verb + Object + [Explanation], and is concise, consisting of one clear, to-the-point sentence, with up to maximum of 20 words.
          Specifically, start with an action verb in active present tense (e.g., undermines, discriminates, infringes, reduces, increases but NOT potentially) followed by the object and the reason in case it is not obvious and requires an explanation. Since we are talking about potential risks in general, there is no need to use the word potentially anywhere. Hence, this would NOT be a good output: "Potentially undermines the right to privacy if the facial recognition data is not properly secured." while this is good: "Undermines the right to privacy if the facial recognition data is not properly secured."
          Also, we want these descriptions to be read by broad public who does not have a deep knowledge about Human Rights or Sustainable Development Goals. Hence, we want descriptions without any mention of specific Human Right Article IDs, or Sustainable Development Goal Targets and Indicators. For instance, this is NOT a good output: "Undermines target 11.3 by controlling public spaces without democratic civil society participation.", and this is, instead, good: "Controls public spaces without democratic civil society participation.".

        Other examples of well-formatted "Risk Descriptions":
        
        * Discriminates against certain players, such as women or those from certain ethnic backgrounds.
        * Leads to job losses in the transportation sector, particularly among drivers, if it results in increased automation and reduced need for human drivers.

      Ensure to include ALL the input risks, unless there are duplicates across risks that impact SDGs and human rights, in which case, merge them into a single risk. Sometimes, the input includes both risks and benefits for a use together. In that case, make sure to extract only the risks. For example:
            If in INPUT you had: "The AI system supports social, economic, and political inclusion by providing a tool that can be used by anyone, regardless of their background, to improve their personal growth. However, it could potentially undermine this by reinforcing biases in facial recognition technology.",
            You should extract and include as a risk description ONLY the following part: "Undermines social, economic, and political inclusion by reinforcing biases in facial recognition technology."

      Finally, and crucially, examine the risks you have identified for this specific AI use to ensure their uniqueness. Specifically, if a risk is initially categorized under both 'Capability' and 'Human Interaction' or any other overlapping categories from the Axis 1, select the most appropriate single category for it. Exclude it from any additional categories. By maintaining a distinct risk set for each AI use, assign unique Risk Identifiers (RIDs) accordingly (this should be only numbers). Subsequently, for comprehensive tracking, we will generate a universal Risk Identifier for all AI uses by incorporating the Use Identifier (UseID) provided in input, in the following structure: "UseID-RID".

      Your output should be in the following format:

      {{
        "AI System Use Description": "",
        "Overall Risk Assessment": [
      {{
        "Capability": [
            {{"Risk ID": "UseID-RID",
             "Risk Description": "targets AI systems, their technical components, and the processes by which these systems and components are created",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ...
            {{"Risk ID": "UseID-RID",
             "Risk Description": "targets AI systems, their technical components, and the processes by which these systems and components are created",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ],
      }},
      {{
        "Human Interaction": [
            {{"Risk ID": "UseID-RID",
             "Risk Description": "risk that targets the experience of people interacting with AI systems and their effects on these people",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ...
            {{"Risk ID": "UseID-RID",
             "Risk Description": "risk that targets the experience of people interacting with AI systems and their effects on these people",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ],
       }},
       {{
          "Systemic Impact": [
            {{"Risk ID": "UseID-RID",
             "Risk Description": "risk that the impact of an AI system on the broader systems in which it is embedded, such as society, the economy, and the natural environment",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ...
            {{"Risk ID": "UseID-RID",
             "Risk Description": "risk that the impact of an AI system on the broader systems in which it is embedded, such as society, the economy, and the natural environment",
             "Stakholders affected by risk": ["one or more of AI User, AI Subject, Institutions, General Public and Environment"],
             "SDGs affected by risk": ["zero or more SDGs"],
             "Human Rights affected by risk": ["zero or more HR articles"]
            }},
            ],
        }}
      ]
    }}

    ***Check your planned output before outputting it: if it contains any explanations besides the JSON string, omit the explanations. Make sure to output ONLY a correctly formatted JSON string and nothing else.***
    """
    }
]



def format_prompt(MESSAGES, useID, domain,purpose,aiCapability,aiUser,aiSubject,SDG_assessment,HR_assessment):
    S = "test {}"
    messages = deepcopy(MESSAGES)
    messages[1]['content'] = messages[1]['content'].format(useID, domain,purpose,aiCapability,aiUser,aiSubject,SDG_assessment,HR_assessment)
    return messages


In [43]:
# cost = 0

FULL_RES = []

start_time = time.time()
i = 0


for riskID in risk_ids:
  riskID = str(riskID)
  print (f" Parsing use {riskID}")

  SDG_assessment = "[]"
  HR_assessment = "[]"

  for useSDG in SDG_risks:
    useI = str(useSDG['id'])
    if useI == riskID:
      SDG_assessment = str(useSDG["SDG Assessment"])
      break

  for useHR in HR_risks:
    useI = str(useHR['id'])
    if useI == riskID:
      HR_assessment = str(useHR["Assessment of impact on human rights"])
      break


  # Variables for message placeholders
  useID = riskID
  domain = useSDG['Details'][0]
  purpose = useSDG['Details'][1]
  aiCapability = useSDG['Details'][2]
  aiUser = useSDG['Details'][3]
  aiSubject = useSDG['Details'][4]

  assert domain == useHR['Details'][0]
  assert aiSubject == useHR['Details'][4]

  # Extracting "Use i" details
  use_i_details = [domain,purpose,aiCapability,aiUser,aiSubject]

  print(use_i_details)

  # adapt the prompt for useI
  messages = format_prompt(MESSAGES, useID, domain,purpose,aiCapability,aiUser,aiSubject,SDG_assessment,HR_assessment)

  # run the prompt
  response = chat_gpt(messages, temperature=0)
  print(response)

  # response, token_count = chat_gpt(messages, temperature=0)
  # res = token_count
  # cost_chunk = (res['prompt_tokens'] * 0.03  + res['completion_tokens'] * 0.06)/1000.0
  # cost += cost_chunk

  print (response)
  response = ast.literal_eval(response)


  combined_response = {}
  combined_response["id"]= useI
  combined_response["Details"] = use_i_details
  combined_response["Description"] = response["AI System Use Description"]
  combined_response["Risks"] = response["Overall Risk Assessment"]

  # for k, v in response.items():
  #   combined_response[k] = v
  print (combined_response)

  end_time = time.time()

  print(f"Execution time: {end_time - start_time:.5f} seconds")
  # print (f"TOTAL COST {cost}")

  FULL_RES.append(combined_response)

 Parsing use 76
['Accessibility and Inclusion', 'Assisting visually impaired individuals', 'Identifying faces and providing audio descriptions', 'Accessibility software developers', 'Visually impaired individuals']
{
  "AI System Use Description": "The AI system identifies faces and provides audio descriptions for visually impaired individuals for accessibility software developers.",
  "Overall Risk Assessment": [
    {
      "Capability": [
        {
          "Risk ID": "76-1",
          "Risk Description": "Infringes on privacy rights by identifying individuals without their consent.",
          "Stakeholders affected by risk": ["AI Subject", "Institutions", "General Public and Environment"],
          "SDGs affected by risk": [],
          "Human Rights affected by risk": ["Article 12"]
        }
      ],
      "Human Interaction": [],
      "Systemic Impact": []
    }
  ]
}
{
  "AI System Use Description": "The AI system identifies faces and provides audio descriptions for visuall

In [44]:
###############################
# save result
with open(f"../../results/risk_categories/FULL_risk_categorisation.json", "w") as json_file:
    json.dump(FULL_RES, json_file, indent=4)  # 4 spaces of indentation
# Download the file to your local machine

# THE END